Beispiel #1
0
 def test_blastp(self):
     """Pairwise BLASTP search"""
     global exe_names
     cline = Applications.NcbiblastpCommandline(exe_names["blastp"],
                     query="Fasta/rose.pro",
                     subject="GenBank/NC_005816.faa",
                     evalue=1)
     self.assertEqual(str(cline), _escape_filename(exe_names["blastp"])
                      + " -query Fasta/rose.pro -evalue 1"
                      + " -subject GenBank/NC_005816.faa")
     child = subprocess.Popen(str(cline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
                      % (return_code, cline))
     self.assertEqual(10, stdoutdata.count("Query= "))
     if stdoutdata.count("***** No hits found *****") == 7:
         # This happens with BLAST 2.2.26+ which is potentially a bug
         pass
     else:
         self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
Beispiel #2
0
def get_emboss_version():
    """Returns a tuple of three ints, e.g. (6,1,0)"""
    #Windows and Unix versions of EMBOSS seem to differ in
    #which lines go to stdout and stderr - so merge them.
    child = subprocess.Popen(_escape_filename(exes["embossversion"]),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             universal_newlines=True,
                             shell=(sys.platform!="win32"))
    stdout, stderr = child.communicate()
    child.stdout.close()  # This is both stdout and stderr
    del child
    assert stderr is None  # Send to stdout instead
    for line in stdout.split("\n"):
        if line.strip()=="Reports the current EMBOSS version number":
            pass
        elif line.startswith("Writes the current EMBOSS version number"):
            pass
        elif line.count(".")==2:
            return tuple(int(v) for v in line.strip().split("."))
        elif line.count(".")==3:
            #e.g. I installed mEMBOSS-6.2.0.1-setup.exe
            #which reports 6.2.0.1 - for this return (6,2,0)
            return tuple(int(v) for v in line.strip().split("."))[:3]
        else:
            #Either we can't understand the output, or this is really
            #an error message not caught earlier (e.g. not in English)
            raise MissingExternalDependencyError(
                "Install EMBOSS if you want to use Bio.Emboss (%s)."
                % line)
    #In case there was no output at all...
    raise MissingExternalDependencyError("Could not get EMBOSS version")
Beispiel #3
0
 def test_Prank_simple_with_NEXUS_output(self):
     """Simple round-trip through app with infile, output in NEXUS
     output.?.??? files written to cwd - no way to redirect
     """
     records = list(SeqIO.parse(self.infile1, "fasta"))
     #Try using keyword argument,
     cmdline = PrankCommandline(prank_exe, d=self.infile1)
     #Try using a property,
     cmdline.d = self.infile1
     cmdline.f = 17  # NEXUS format
     cmdline.set_parameter("dots", True)
     self.assertEqual(
         str(cmdline),
         _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdout, stderr = cmdline()
     self.assertTrue("Total time" in stdout)
     self.assertEqual(stderr, "")
     try:
         align = AlignIO.read("output.2.nex", "nexus")
         for old, new in zip(records, align):
             #Old versions of Prank reduced name to 9 chars
             self.assertTrue(old.id == new.id or old.id[:9] == new.id)
             #infile1 has alignment gaps in it
             self.assertEqual(
                 str(new.seq).replace("-", ""),
                 str(old.seq).replace("-", ""))
     except NexusError:
         #See bug 3119,
         #Bio.Nexus can't parse output from prank v100701 (1 July 2010)
         pass
Beispiel #4
0
 def test_tblastn(self):
     """Pairwise TBLASTN search."""
     global exe_names
     cline = Applications.NcbitblastnCommandline(
         exe_names["tblastn"],
         query="GenBank/NC_005816.faa",
         subject="GenBank/NC_005816.fna",
         evalue="1e-6",
     )
     self.assertEqual(
         str(cline),
         _escape_filename(exe_names["tblastn"]) +
         " -query GenBank/NC_005816.faa -evalue 1e-6" +
         " -subject GenBank/NC_005816.fna",
     )
     child = subprocess.Popen(
         str(cline),
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         universal_newlines=True,
         shell=(sys.platform != "win32"),
     )
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(
         return_code, 0,
         "Got error code %i back from:\n%s" % (return_code, cline))
     self.assertEqual(10, stdoutdata.count("Query= "))
     self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
    def test_fasta_db_nucl(self):
        """Test makeblastdb wrapper with nucleotide database"""
        global exe_names
        cline = Applications.NcbimakeblastdbCommandline(
            exe_names["makeblastdb"],
            input_file="GenBank/NC_005816.fna",
            dbtype="nucl",
            hash_index=True,
            max_file_sz="20MB",
            parse_seqids=True,
            taxid=10)

        self.assertEqual(str(cline),
                         _escape_filename(exe_names["makeblastdb"]) +
                         " -dbtype nucl -in GenBank/NC_005816.fna"
                         " -parse_seqids -hash_index -max_file_sz 20MB"
                         " -taxid 10")

        child = subprocess.Popen(str(cline),
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE,
                                 universal_newlines=True,
                                 shell=(sys.platform != "win32"))
        stdoutdata, stderrdata = child.communicate()
        return_code = child.returncode

        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhr"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nin"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nog"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsq"))
Beispiel #6
0
 def _as_list(self):
     if self.value is None:
         return []
     elif self.is_filename:
         return [_escape_filename(self.value)]
     else:
         return [self.value]
 def test_blastp(self):
     """Pairwise BLASTP search"""
     global exe_names
     cline = Applications.NcbiblastpCommandline(exe_names["blastp"],
                     query="Fasta/rose.pro",
                     subject="GenBank/NC_005816.faa",
                     evalue=1)
     self.assertEqual(str(cline), _escape_filename(exe_names["blastp"])
                      + " -query Fasta/rose.pro -evalue 1"
                      + " -subject GenBank/NC_005816.faa")
     child = subprocess.Popen(str(cline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
                      % (return_code, cline))
     self.assertEqual(10, stdoutdata.count("Query= "))
     if stdoutdata.count("***** No hits found *****") == 7:
         # This happens with BLAST 2.2.26+ which is potentially a bug
         pass
     else:
         self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
 def test_using_stdin(self):
     """Simple alignment using stdin"""
     input_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file,"fasta"))
     #Prepare the command... use Clustal output (with a MUSCLE header)
     cline = MuscleCommandline(muscle_exe, clw=True)
     self.assertEqual(str(cline).rstrip(),
                      _escape_filename(muscle_exe) + " -clw")
     self.assertEqual(str(eval(repr(cline))), str(cline))
     child = subprocess.Popen(str(cline),
                              stdin=subprocess.PIPE,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform!="win32"))
     SeqIO.write(records, child.stdin, "fasta")
     child.stdin.close()
     #Alignment will now run...
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     records.sort(key = lambda rec: rec.id)
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     self.assertEqual(0, child.wait())
     child.stdout.close()
     child.stderr.close()
     del child
Beispiel #9
0
    def test_fasta_db_nucl(self):
        """Test makeblastdb wrapper with nucleotide database."""
        global exe_names
        cline = Applications.NcbimakeblastdbCommandline(
            exe_names["makeblastdb"],
            input_file="GenBank/NC_005816.fna",
            dbtype="nucl",
            hash_index=True,
            max_file_sz="20MB",
            parse_seqids=True,
            taxid=10)

        self.assertEqual(str(cline),
                         _escape_filename(exe_names["makeblastdb"]) +
                         " -dbtype nucl -in GenBank/NC_005816.fna"
                         " -parse_seqids -hash_index -max_file_sz 20MB"
                         " -taxid 10")

        child = subprocess.Popen(str(cline),
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE,
                                 universal_newlines=True,
                                 shell=(sys.platform != "win32"))
        stdoutdata, stderrdata = child.communicate()
        return_code = child.returncode

        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhr"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nin"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nog"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsq"))
Beispiel #10
0
 def test_Prank_simple_with_NEXUS_output(self):
     """Simple round-trip through app with infile, output in NEXUS
     output.?.??? files written to cwd - no way to redirect
     """
     records = list(SeqIO.parse(self.infile1, "fasta"))
     #Try using keyword argument,
     cmdline = PrankCommandline(prank_exe, d=self.infile1, noxml=True)
     #Try using a property,
     cmdline.d = self.infile1
     cmdline.f = 17  # NEXUS format
     cmdline.set_parameter("notree", True)
     self.assertEqual(str(cmdline), _escape_filename(prank_exe) +
                      " -d=Fasta/fa01 -f=17 -noxml -notree")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdout, stderr = cmdline()
     self.assertTrue("Total time" in stdout)
     self.assertEqual(stderr, "")
     try:
         align = AlignIO.read("output.2.nex", "nexus")
         for old, new in zip(records, align):
             #Old versions of Prank reduced name to 9 chars
             self.assertTrue(old.id == new.id or old.id[:9] == new.id)
             #infile1 has alignment gaps in it
             self.assertEqual(str(new.seq).replace("-", ""),
                              str(old.seq).replace("-", ""))
     except NexusError:
         #See bug 3119,
         #Bio.Nexus can't parse output from prank v100701 (1 July 2010)
         pass
Beispiel #11
0
 def conversion(self, prank_number, prank_ext, format):
     """Get PRANK to do a conversion, and check it with SeqIO."""
     filename = f"{self.output}.{prank_ext}"
     if os.path.isfile(filename):
         os.remove(filename)
     cmdline = PrankCommandline(
         prank_exe,
         d=self.input,
         convert=True,
         f=prank_number,
         o=f'"{self.output}"',
     )
     self.assertEqual(
         str(cmdline),
         _escape_filename(prank_exe)
         + f' -d={self.input} -o="{self.output}" -f={prank_number} -convert',
     )
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     message, error = cmdline()
     self.assertIn("PRANK", message)
     self.assertIn((f"converting '{self.input}' to '{filename}'"), message, message)
     self.assertEqual(error, "")
     self.assertTrue(os.path.isfile(filename))
     old = AlignIO.read(self.input, "fasta")
     # Hack...
     if format == "phylip":
         for record in old:
             record.id = record.id[:10]
     new = AlignIO.read(filename, format)
     self.assertEqual(len(old), len(new))
     for old_r, new_r in zip(old, new):
         self.assertEqual(old_r.id, new_r.id)
         self.assertEqual(old_r.seq, new_r.seq)
     os.remove(filename)
Beispiel #12
0
 def conversion(self, prank_number, prank_ext, format):
     """Get PRANK to do a conversion, and check it with SeqIO."""
     filename = "%s.%s" % (self.output, prank_ext)
     if os.path.isfile(filename):
         os.remove(filename)
     cmdline = PrankCommandline(prank_exe,
                                d=self.input,
                                convert=True,
                                f=prank_number,
                                o='"%s"' % self.output)
     self.assertEqual(
         str(cmdline),
         _escape_filename(prank_exe) + " -d=%s" % self.input +
         ' -o="%s"' % self.output + " -f=%i" % prank_number + " -convert")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     message, error = cmdline()
     self.assertIn("PRANK", message)
     self.assertTrue(("converting '%s' to '%s'" % (self.input, filename))
                     in message, message)
     self.assertEqual(error, "")
     self.assertTrue(os.path.isfile(filename))
     old = AlignIO.read(self.input, "fasta")
     # Hack...
     if format == "phylip":
         for record in old:
             record.id = record.id[:10]
     new = AlignIO.read(filename, format)
     self.assertEqual(len(old), len(new))
     for old_r, new_r in zip(old, new):
         self.assertEqual(old_r.id, new_r.id)
         self.assertEqual(str(old_r.seq), str(new_r.seq))
     os.remove(filename)
Beispiel #13
0
 def test_using_stdin(self):
     """Simple alignment using stdin"""
     input_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file,"fasta"))
     #Prepare the command... use Clustal output (with a MUSCLE header)
     cline = MuscleCommandline(muscle_exe, clw=True)
     self.assertEqual(str(cline).rstrip(),
                      _escape_filename(muscle_exe) + " -clw")
     self.assertEqual(str(eval(repr(cline))), str(cline))
     child = subprocess.Popen(str(cline),
                              stdin=subprocess.PIPE,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform!="win32"))
     SeqIO.write(records, child.stdin, "fasta")
     child.stdin.close()
     #Alignment will now run...
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     records.sort(key = lambda rec: rec.id)
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     self.assertEqual(0, child.wait())
     child.stdout.close()
     child.stderr.close()
     del child
Beispiel #14
0
 def test_simple_clustal_strict(self):
     """Simple muscle call using strict Clustal output"""
     input_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file,"fasta"))
     records.sort(key = lambda rec: rec.id)
     #Prepare the command...
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("in", input_file)
     #Use clustal output (with a CLUSTAL header)
     cmdline.set_parameter("clwstrict", True)  # Default None treated as False!
     self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
                      " -in Fasta/f002 -clwstrict")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform!="win32"))
     #Didn't use -quiet so there should be progress reports on stderr,
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     child.stdout.close()
     child.stderr.close()
     del child
Beispiel #15
0
def get_emboss_version():
    """Returns a tuple of three ints, e.g. (6,1,0)"""
    #Windows and Unix versions of EMBOSS seem to differ in
    #which lines go to stdout and stderr - so merge them.
    child = subprocess.Popen(_escape_filename(exes["embossversion"]),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             universal_newlines=True,
                             shell=(sys.platform != "win32"))
    stdout, stderr = child.communicate()
    child.stdout.close()  # This is both stdout and stderr
    del child
    assert stderr is None  # Send to stdout instead
    for line in stdout.split("\n"):
        if line.strip() == "Reports the current EMBOSS version number":
            pass
        elif line.startswith("Writes the current EMBOSS version number"):
            pass
        elif line.count(".") == 2:
            return tuple(int(v) for v in line.strip().split("."))
        elif line.count(".") == 3:
            #e.g. I installed mEMBOSS-6.2.0.1-setup.exe
            #which reports 6.2.0.1 - for this return (6,2,0)
            return tuple(int(v) for v in line.strip().split("."))[:3]
        else:
            #Either we can't understand the output, or this is really
            #an error message not caught earlier (e.g. not in English)
            raise MissingExternalDependencyError(
                "Install EMBOSS if you want to use Bio.Emboss (%s)." % line)
    #In case there was no output at all...
    raise MissingExternalDependencyError("Could not get EMBOSS version")
Beispiel #16
0
 def conversion(self, prank_number, prank_ext, format):
     """Get PRANK to do a conversion, and check it with SeqIO."""
     filename = "%s.%s" % (self.output, prank_ext)
     if os.path.isfile(filename):
         os.remove(filename)
     cmdline = PrankCommandline(prank_exe, d=self.input,
                                convert=True, f=prank_number,
                                o='"%s"' % self.output)
     self.assertEqual(str(cmdline), _escape_filename(prank_exe)
                      + ' -d=%s' % self.input
                      + ' -o="%s"' % self.output
                      + ' -f=%i' % prank_number
                      + ' -convert')
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     message, error = cmdline()
     self.assertTrue("PRANK" in message, message)
     self.assertTrue(("converting '%s' to '%s'" % (self.input, filename))
                     in message, message)
     self.assertEqual(error, "")
     self.assertTrue(os.path.isfile(filename))
     old = AlignIO.read(self.input, "fasta")
     # Hack...
     if format == "phylip":
         for record in old:
             record.id = record.id[:10]
     new = AlignIO.read(filename, format)
     self.assertEqual(len(old), len(new))
     for old_r, new_r in zip(old, new):
         self.assertEqual(old_r.id, new_r.id)
         self.assertEqual(str(old_r.seq), str(new_r.seq))
     os.remove(filename)
Beispiel #17
0
 def test_Prank_simple_with_NEXUS_output(self):
     """Simple round-trip through app with infile, output in NEXUS
     output.?.??? files written to cwd - no way to redirect
     """
     records = list(SeqIO.parse(self.infile1, "fasta"))
     # Try using keyword argument,
     cmdline = PrankCommandline(prank_exe, d=self.infile1)
     # Try using a property,
     cmdline.d = self.infile1
     cmdline.f = 17  # NEXUS format
     cmdline.set_parameter("dots", True)
     self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01 -f=17 -dots")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdout, stderr = cmdline()
     self.assertTrue("Total time" in stdout)
     self.assertEqual(stderr, "")
     try:
         if os.path.isfile("output.best.nex"):
             # Prank v.130820 and perhaps earlier use ".best.*" output names
             nex_fname = "output.best.nex"
         elif os.path.isfile("output.2.nex"):
             # Older Prank versions use ".2.*" output names
             nex_fname = "output.2.nex"
         else:
             raise RuntimeError("Can't find PRANK's NEXUS output (*.nex)")
         align = AlignIO.read(nex_fname, "nexus")
         for old, new in zip(records, align):
             # Old versions of Prank reduced name to 9 chars
             self.assertTrue(old.id == new.id or old.id[:9] == new.id)
             # infile1 has alignment gaps in it
             self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
     except NexusError:
         # See bug 3119,
         # Bio.Nexus can't parse output from prank v100701 (1 July 2010)
         pass
 def test_simple_clustal(self):
     """Simple muscle call using Clustal output with a MUSCLE header."""
     input_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file, "fasta"))
     records.sort(key=lambda rec: rec.id)  # noqa: E731
     # Prepare the command... use Clustal output (with a MUSCLE header)
     cmdline = MuscleCommandline(muscle_exe, input=input_file, clw=True)
     self.assertEqual(
         str(cmdline).rstrip(),
         _escape_filename(muscle_exe) + " -in Fasta/f002 -clw")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     # Didn't use -quiet so there should be progress reports on stderr,
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()  # by record.id
     self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     child.stdout.close()
     child.stderr.close()
     del child
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq))
Beispiel #19
0
 def _as_list(self):
     assert isinstance(self.value, list), \
             "Arguments should be a list"
     assert self.value, "Requires at least one filename"
     if self.is_filename:
         return [_escape_filename(v) for v in self.value]
     else:
         return [self.value]
Beispiel #20
0
 def _as_list(self):
     """Return the command line as list"""
     self._validate()
     commandline = [_escape_filename(self.program_name)]
     for parameter in self.parameters:
         if parameter.is_set:
             commandline.extend(parameter._as_list())
     return commandline
Beispiel #21
0
 def test_with_multiple_output_formats(self):
     """Simple muscle call with multiple output formats."""
     input_file = "Fasta/f002"
     output_html = "temp_f002.html"
     output_clwstrict = "temp_f002.clw"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file, "fasta"))
     records.sort(key=lambda rec: rec.id)  # noqa: E731
     # Prepare the command... use Clustal output (with a MUSCLE header)
     cmdline = MuscleCommandline(
         muscle_exe,
         input=input_file,
         clw=True,
         htmlout=output_html,
         clwstrictout=output_clwstrict,
     )
     self.assertEqual(
         str(cmdline).rstrip(),
         _escape_filename(muscle_exe) +
         " -in Fasta/f002 -clw -htmlout temp_f002.html" +
         " -clwstrictout temp_f002.clw",
     )
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(
         str(cmdline),
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         universal_newlines=True,
         shell=(sys.platform != "win32"),
     )
     # Clustalw on stdout:
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     # Didn't use -quiet so there should be progress reports on stderr,
     self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
     child.stdout.close()
     child.stderr.close()
     del child
     handle = open(output_html)
     html = handle.read().strip().upper()
     handle.close()
     self.assertTrue(html.startswith("<HTML"))
     self.assertTrue(html.endswith("</HTML>"))
     # ClustalW strict:
     align = AlignIO.read(output_clwstrict, "clustal")
     align.sort()
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
     os.remove(output_html)
     os.remove(output_clwstrict)
Beispiel #22
0
 def test_Muscle_simple(self):
     """Simple round-trip through app just infile and outfile"""
     cmdline = MuscleCommandline(muscle_exe,
                                 input=self.infile1,
                                 out=self.outfile1)
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe)
                      + ' -in Fasta/f002 -out "Fasta/temp align out1.fa"')
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
Beispiel #23
0
 def test_Prank_simple(self):
     """Simple round-trip through app with infile.
     output.?.??? files written to cwd - no way to redirect
     """
     cmdline = PrankCommandline(prank_exe)
     cmdline.set_parameter("d", self.infile1)
     self.assertEqual(str(cmdline), _escape_filename(prank_exe) + " -d=Fasta/fa01")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(error, "")
     self.assertTrue("Total time" in output)
Beispiel #24
0
 def _as_list(self):
     if self.value is None:
         return [self.names[0]]
     if self.is_filename:
         v = _escape_filename(self.value)
     else:
         v = str(self.value)
     if self.equate:
         return ["%s=%s" % (self.names[0], v)]
     else:
         return [self.names[0], v]
 def test_Muscle_simple(self):
     """Simple round-trip through app just infile and outfile"""
     cmdline = MuscleCommandline(muscle_exe,
                                 input=self.infile1,
                                 out=self.outfile1)
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe)
                      + ' -in Fasta/f002 -out "Fasta/temp align out1.fa"')
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
Beispiel #26
0
 def test_Prank_simple(self):
     """Simple round-trip through app with infile.
     output.?.??? files written to cwd - no way to redirect
     """
     cmdline = PrankCommandline(prank_exe)
     cmdline.set_parameter("d", self.infile1)
     self.assertEqual(str(cmdline),
                      _escape_filename(prank_exe) + " -d=Fasta/fa01")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(error, "")
     self.assertTrue("Total time" in output)
 def test_Muscle_profile_with_options(self):
     """Profile alignment, and switch and valued options"""
     #Using some keyword arguments, note -stable isn't supported in v3.8
     cmdline = MuscleCommandline(muscle_exe, out=self.outfile4,
                                 in1=self.infile2, in2=self.infile3,
                                 profile=True, stable=True,
                                 cluster1="neighborjoining")
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
                      " -out Fasta/temp_align_out4.fa" +
                      " -profile -in1 Fasta/fa01 -in2 Fasta/f001" +
                      " -cluster1 neighborjoining -stable")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     """
Beispiel #28
0
 def test_Muscle_profile_with_options(self):
     """Profile alignment, and switch and valued options"""
     #Using some keyword arguments, note -stable isn't supported in v3.8
     cmdline = MuscleCommandline(muscle_exe, out=self.outfile4,
                                 in1=self.infile2, in2=self.infile3,
                                 profile=True, stable=True,
                                 cluster1="neighborjoining")
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
                      " -out Fasta/temp_align_out4.fa" +
                      " -profile -in1 Fasta/fa01 -in2 Fasta/f001" +
                      " -cluster1 neighborjoining -stable")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     """
Beispiel #29
0
 def test_Muscle_profile_simple(self):
     """Simple round-trip through app doing a profile alignment"""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("out", self.outfile3)
     cmdline.set_parameter("profile", True)
     cmdline.set_parameter("in1", self.infile2)
     cmdline.set_parameter("in2", self.infile3)
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
                      " -out Fasta/temp_align_out3.fa" +
                      " -profile -in1 Fasta/fa01 -in2 Fasta/f001")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
     self.assertTrue(error.strip().startswith("MUSCLE"), output)
 def test_Muscle_profile_simple(self):
     """Simple round-trip through app doing a profile alignment"""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("out", self.outfile3)
     cmdline.set_parameter("profile", True)
     cmdline.set_parameter("in1", self.infile2)
     cmdline.set_parameter("in2", self.infile3)
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
                      " -out Fasta/temp_align_out3.fa" +
                      " -profile -in1 Fasta/fa01 -in2 Fasta/f001")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
     self.assertTrue(error.strip().startswith("MUSCLE"), output)
Beispiel #31
0
 def test_long(self):
     """Simple muscle call using long file."""
     # Create a large input file by converting some of another example file
     temp_large_fasta_file = "temp_cw_prot.fasta"
     records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40]
     SeqIO.write(records, temp_large_fasta_file, "fasta")
     # Prepare the command...
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("in", temp_large_fasta_file)
     # Use fast options
     cmdline.set_parameter("maxiters", 1)
     cmdline.set_parameter("diags", True)  # Default None treated as False!
     # Use clustal output
     cmdline.set_parameter("clwstrict",
                           True)  # Default None treated as False!
     # Shoudn't need this, but just to make sure it is accepted
     cmdline.set_parameter("maxhours", 0.1)
     # No progress reports to stderr
     cmdline.set_parameter("quiet", True)  # Default None treated as False!
     self.assertEqual(
         str(cmdline).rstrip(),
         _escape_filename(muscle_exe) +
         " -in temp_cw_prot.fasta -diags -maxhours 0.1" +
         " -maxiters 1 -clwstrict -quiet",
     )
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(
         str(cmdline),
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         universal_newlines=True,
         shell=(sys.platform != "win32"),
     )
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     records.sort(key=lambda rec: rec.id)  # noqa: E731
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq))
     # See if quiet worked:
     self.assertEqual("", child.stderr.read().strip())
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     child.stdout.close()
     child.stderr.close()
     del child
     os.remove(temp_large_fasta_file)
Beispiel #32
0
 def test_with_multiple_output_formats(self):
     """Simple muscle call with multiple output formats"""
     input_file = "Fasta/f002"
     output_html = "temp_f002.html"
     output_clwstrict = "temp_f002.clw"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file,"fasta"))
     records.sort(key = lambda rec: rec.id)
     #Prepare the command... use Clustal output (with a MUSCLE header)
     cmdline = MuscleCommandline(muscle_exe, input=input_file,
                                 clw=True, htmlout = output_html,
                                 clwstrictout = output_clwstrict)
     self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
                      " -in Fasta/f002 -clw -htmlout temp_f002.html" +
                      " -clwstrictout temp_f002.clw")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform!="win32"))
     #Clustalw on stdout:
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     #Didn't use -quiet so there should be progress reports on stderr,
     self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
     child.stdout.close()
     child.stderr.close()
     del child
     handle = open(output_html,"rU")
     html = handle.read().strip().upper()
     handle.close()
     self.assertTrue(html.startswith("<HTML"))
     self.assertTrue(html.endswith("</HTML>"))
     #ClustalW strict:
     align = AlignIO.read(output_clwstrict, "clustal")
     align.sort()
     self.assertEqual(len(records),len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
     os.remove(output_html)
     os.remove(output_clwstrict)
Beispiel #33
0
    def test_fasta_db_prot_legacy(self):
        """Test makeblastdb wrapper with protein database legacy, version 4."""
        global exe_names
        cline = Applications.NcbimakeblastdbCommandline(
            exe_names["makeblastdb"],
            blastdb_version=4,
            input_file="GenBank/NC_005816.faa",
            dbtype="prot",
            hash_index=True,
            max_file_sz="20MB",
            parse_seqids=True,
            taxid=10,
        )

        self.assertEqual(
            str(cline),
            _escape_filename(exe_names["makeblastdb"]) + " -blastdb_version 4"
            " -dbtype prot -in GenBank/NC_005816.faa"
            " -parse_seqids -hash_index -max_file_sz 20MB"
            " -taxid 10",
        )

        child = subprocess.Popen(
            str(cline),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            shell=(sys.platform != "win32"),
        )
        stdoutdata, stderrdata = child.communicate()
        return_code = child.returncode

        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phr"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pin"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pog"))
        self.assertTrue(
            os.path.isfile("GenBank/NC_005816.faa.psd")
            or os.path.isfile("GenBank/NC_005816.faa.pnd")
        )
        self.assertTrue(
            os.path.isfile("GenBank/NC_005816.faa.psi")
            or os.path.isfile("GenBank/NC_005816.faa.pni")
        )
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.psq"))
 def test_Muscle_with_options(self):
     """Round-trip through app with a switch and valued option"""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("input", self.infile1)  # "input" is alias for "in"
     cmdline.set_parameter("out", self.outfile2)
     #Use property:
     cmdline.objscore = "sp"
     cmdline.noanchors = True
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
                      " -in Fasta/f002" +
                      " -out Fasta/temp_align_out2.fa" +
                      " -objscore sp -noanchors")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
     self.assertTrue(error.strip().startswith("MUSCLE"), output)
Beispiel #35
0
 def test_Muscle_with_options(self):
     """Round-trip through app with a switch and valued option"""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("input", self.infile1)  # "input" is alias for "in"
     cmdline.set_parameter("out", self.outfile2)
     #Use property:
     cmdline.objscore = "sp"
     cmdline.noanchors = True
     self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
                      " -in Fasta/f002" +
                      " -out Fasta/temp_align_out2.fa" +
                      " -objscore sp -noanchors")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
     self.assertTrue(error.strip().startswith("MUSCLE"), output)
Beispiel #36
0
 def test_Prank_complex_command_line(self):
     """Round-trip with complex command line."""
     cmdline = PrankCommandline(prank_exe)
     cmdline.set_parameter("d", self.infile1)
     cmdline.set_parameter("-gaprate", 0.321)
     cmdline.set_parameter("gapext", 0.6)
     cmdline.set_parameter("-dots", 1)  # i.e. True
     #Try using a property:
     cmdline.kappa = 3
     cmdline.skipins = True
     cmdline.set_parameter("-once", True)
     cmdline.realbranches = True
     self.assertEqual(str(cmdline), _escape_filename(prank_exe) +
                      " -d=Fasta/fa01" +
                      " -dots -gaprate=0.321 -gapext=0.6 -kappa=3" +
                      " -once -skipins -realbranches")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdout, stderr = cmdline()
     self.assertTrue("Total time" in stdout, stdout)
Beispiel #37
0
 def test_Prank_complex_command_line(self):
     """Round-trip with complex command line."""
     cmdline = PrankCommandline(prank_exe)
     cmdline.set_parameter("d", self.infile1)
     cmdline.set_parameter("-gaprate", 0.321)
     cmdline.set_parameter("gapext", 0.6)
     cmdline.set_parameter("-dots", 1)  # i.e. True
     # Try using a property:
     cmdline.kappa = 3
     cmdline.skipins = True
     cmdline.set_parameter("-once", True)
     cmdline.realbranches = True
     self.assertEqual(str(cmdline), _escape_filename(prank_exe) +
                      " -d=Fasta/fa01" +
                      " -dots -gaprate=0.321 -gapext=0.6 -kappa=3" +
                      " -once -skipins -realbranches")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     stdout, stderr = cmdline()
     self.assertTrue("Total time" in stdout, stdout)
Beispiel #38
0
 def test_long(self):
     """Simple muscle call using long file"""
     #Create a large input file by converting some of another example file
     temp_large_fasta_file = "temp_cw_prot.fasta"
     records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40]
     SeqIO.write(records, temp_large_fasta_file, "fasta")
     #Prepare the command...
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("in", temp_large_fasta_file)
     #Use fast options
     cmdline.set_parameter("maxiters", 1)
     cmdline.set_parameter("diags", True)  # Default None treated as False!
     #Use clustal output
     cmdline.set_parameter("clwstrict", True)  # Default None treated as False!
     #Shoudn't need this, but just to make sure it is accepted
     cmdline.set_parameter("maxhours", 0.1)
     #No progress reports to stderr
     cmdline.set_parameter("quiet", True)  # Default None treated as False!
     self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
                      " -in temp_cw_prot.fasta -diags -maxhours 0.1" +
                      " -maxiters 1 -clwstrict -quiet")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform!="win32"))
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     records.sort(key = lambda rec: rec.id)
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
     #See if quiet worked:
     self.assertEqual("", child.stderr.read().strip())
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     child.stdout.close()
     child.stderr.close()
     del child
     os.remove(temp_large_fasta_file)
 def test_tblastn(self):
     """Pairwise TBLASTN search"""
     global exe_names
     cline = Applications.NcbitblastnCommandline(exe_names["tblastn"],
                     query="GenBank/NC_005816.faa",
                     subject="GenBank/NC_005816.fna",
                     evalue="1e-6")
     self.assertEqual(str(cline), _escape_filename(exe_names["tblastn"])
                      + " -query GenBank/NC_005816.faa -evalue 1e-6"
                      + " -subject GenBank/NC_005816.fna")
     child = subprocess.Popen(str(cline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
                      % (return_code, cline))
     self.assertEqual(10, stdoutdata.count("Query= "))
     self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
Beispiel #40
0
    def __str__(self):
        """Write out the command line as a string."""

        #On Linux with clustalw 1.83, you can do:
        #clustalw input.faa
        #clustalw /full/path/input.faa
        #clustalw -INFILE=input.faa
        #clustalw -INFILE=/full/path/input.faa
        #
        #Note these fail (using DOS style slashes):
        #
        #clustalw /INFILE=input.faa
        #clustalw /INFILE=/full/path/input.faa
        #
        #On Windows XP with clustalw.exe 1.83, these work at
        #the command prompt:
        #
        #clustalw.exe input.faa
        #clustalw.exe /INFILE=input.faa
        #clustalw.exe /INFILE="input.faa"
        #clustalw.exe /INFILE="with space.faa"
        #clustalw.exe /INFILE=C:\full\path\input.faa
        #clustalw.exe /INFILE="C:\full path\with spaces.faa"
        #
        #Sadly these fail:
        #clustalw.exe "input.faa"
        #clustalw.exe "with space.faa"
        #clustalw.exe C:\full\path\input.faa
        #clustalw.exe "C:\full path\with spaces.faa"
        #
        #Testing today (using a different binary of clustalw.exe 1.83),
        #using -INFILE as follows seems to work.  However I had once noted:
        #These also fail but a minus/dash does seem to
        #work with other options (!):
        #clustalw.exe -INFILE=input.faa
        #clustalw.exe -INFILE=C:\full\path\input.faa
        #
        #Also these fail:
        #clustalw.exe "/INFILE=input.faa"
        #clustalw.exe "/INFILE=C:\full\path\input.faa"
        #
        #Thanks to Emanuel Hey for flagging this on the mailing list.
        #
        #In addition, both self.command and self.sequence_file
        #may contain spaces, so should be quoted. But clustalw
        #is fussy.
        cline = _escape_filename(self.command)
        cline += ' -INFILE=%s' % _escape_filename(self.sequence_file)

        # general options
        if self.type:
            cline += " -TYPE=%s" % self.type
        if self.is_quick == 1:
            #Some versions of clustalw are case sensitive,
            #and require -quicktree rather than -QUICKTREE
            cline += " -quicktree"
        if self.allow_negative == 1:
            cline += " -NEGATIVE"

        # output options
        if self.output_file:
            cline += " -OUTFILE=%s" % _escape_filename(self.output_file)
        if self.output_type:
            cline += " -OUTPUT=%s" % self.output_type
        if self.output_order:
            cline += " -OUTORDER=%s" % self.output_order
        if self.change_case:
            cline += " -CASE=%s" % self.change_case
        if self.add_seqnos:
            cline += " -SEQNOS=%s" % self.add_seqnos
        if self.new_tree:
            # clustal does not work if -align is written -ALIGN
            cline += " -NEWTREE=%s -align" % _escape_filename(self.new_tree)

        # multiple alignment options
        if self.guide_tree:
            cline += " -USETREE=%s" % _escape_filename(self.guide_tree)
        if self.protein_matrix:
            cline += " -MATRIX=%s" % self.protein_matrix
        if self.dna_matrix:
            cline += " -DNAMATRIX=%s" % self.dna_matrix
        if self.gap_open_pen:
            cline += " -GAPOPEN=%s" % self.gap_open_pen
        if self.gap_ext_pen:
            cline += " -GAPEXT=%s" % self.gap_ext_pen
        if self.is_no_end_pen == 1:
            cline += " -ENDGAPS"
        if self.gap_sep_range:
            cline += " -GAPDIST=%s" % self.gap_sep_range
        if self.is_no_pgap == 1:
            cline += " -NOPGAP"
        if self.is_no_hgap == 1:
            cline += " -NOHGAP"
        if len(self.h_gap_residues) != 0:
            # stick the list of residues together as one big list o' residues
            residue_list = ''
            for residue in self.h_gap_residues:
                residue_list = residue_list + residue
            cline += " -HGAPRESIDUES=%s" % residue_list
        if self.max_div:
            cline += " -MAXDIV=%s" % self.max_div
        if self.trans_weight:
            cline += " -TRANSWEIGHT=%s" % self.trans_weight

        return cline