예제 #1
0
 def check_EMBOSS_to_AlignIO(self, filename, old_format, skip_formats=()):
     """Check AlignIO can read seqret's conversion of the file."""
     self.assertTrue(os.path.isfile(filename), filename)
     old_aligns = list(AlignIO.parse(filename, old_format))
     formats = ["clustal", "phylip", "ig", "msf"]
     if len(old_aligns) == 1:
         formats.extend(["fasta", "nexus"])
     for new_format in formats:
         if new_format in skip_formats:
             continue
         cline = SeqretCommandline(
             exes["seqret"],
             sequence=filename,
             sformat=old_format,
             osformat=new_format,
             auto=True,  # no prompting
             stdout=True,
         )
         # Run the tool,
         with subprocess.Popen(
                 str(cline),
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 universal_newlines=True,
                 shell=(sys.platform != "win32"),
         ) as child:
             child.stdin.close()
             new_aligns = list(AlignIO.parse(child.stdout, new_format))
         msg = "converting %s from %s to %s" % (filename, old_format,
                                                new_format)
         self.compare_alignments(old_aligns, new_aligns, msg)
예제 #2
0
def emboss_piped_SeqIO_convert(records, old_format, new_format):
    """Run seqret, returns records (as a generator)."""
    # Setup, this assumes for all the format names used
    # Biopython and EMBOSS names are consistent!
    cline = SeqretCommandline(
        exes["seqret"],
        sformat=old_format,
        osformat=new_format,
        auto=True,  # no prompting
        filter=True,
    )
    # Run the tool,
    child = subprocess.Popen(
        str(cline),
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        universal_newlines=True,
        shell=(sys.platform != "win32"),
    )
    SeqIO.write(records, child.stdin, old_format)
    child.stdin.close()
    child.stderr.close()
    # TODO - Is there a nice way to return an iterator AND
    # automatically close the handle?
    records = list(SeqIO.parse(child.stdout, new_format))
    child.stdout.close()
    return records
예제 #3
0
 def check_EMBOSS_to_SeqIO(self, filename, old_format, skip_formats=()):
     """Check SeqIO can read read seqret's conversion output."""
     # TODO: Why can't we read EMBOSS's swiss output?
     self.assertTrue(os.path.isfile(filename))
     old_records = list(SeqIO.parse(filename, old_format))
     for new_format in ["genbank", "fasta", "pir", "embl", "ig"]:
         if new_format in skip_formats:
             continue
         cline = SeqretCommandline(
             exes["seqret"],
             sequence=filename,
             sformat=old_format,
             osformat=new_format,
             auto=True,  # no prompting
             stdout=True,
         )
         # Run the tool,
         with subprocess.Popen(
                 str(cline),
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 universal_newlines=True,
                 shell=(sys.platform != "win32"),
         ) as child:
             child.stdin.close()
             new_records = list(SeqIO.parse(child.stdout, new_format))
         msg = "converting %s from %s to %s" % (filename, old_format,
                                                new_format)
         self.compare_records(old_records, new_records, msg)
예제 #4
0
def emboss_piped_SeqIO_convert(records, old_format, new_format):
    """Run seqret, returns records (as a generator)."""
    #Setup, this assumes for all the format names used
    #Biopython and EMBOSS names are consistent!
    cline = SeqretCommandline(
        exes["seqret"],
        sformat=old_format,
        osformat=new_format,
        auto=True,  #no prompting
        filter=True)
    #Run the tool,
    child = subprocess.Popen(str(cline),
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             shell=(sys.platform != "win32"))
    SeqIO.write(records, child.stdin, old_format)
    child.stdin.close()
    return SeqIO.parse(child.stdout, new_format)
예제 #5
0
def emboss_convert(filename, old_format, new_format):
    """Run seqret, returns handle."""
    #Setup, this assumes for all the format names used
    #Biopython and EMBOSS names are consistent!
    cline = SeqretCommandline(
        exes["seqret"],
        sequence=filename,
        sformat=old_format,
        osformat=new_format,
        auto=True,  #no prompting
        stdout=True)
    #Run the tool,
    child = subprocess.Popen(str(cline),
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             shell=(sys.platform != "win32"))
    child.stdin.close()
    return child.stdout
예제 #6
0
def emboss_piped_AlignIO_convert(alignments, old_format, new_format):
    """Run seqret, returns alignments (as a generator)."""
    #Setup, this assumes for all the format names used
    #Biopython and EMBOSS names are consistent!
    cline = SeqretCommandline(exes["seqret"],
                              sformat = old_format,
                              osformat = new_format,
                              auto = True, #no prompting
                              filter = True)
    #Run the tool,
    child = subprocess.Popen(str(cline),
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             universal_newlines=True,
                             shell=(sys.platform!="win32"))
    try:
        AlignIO.write(alignments, child.stdin, old_format)
    except Exception, err:
        child.stdin.close()
        child.stderr.close()
        child.stdout.close()
        raise
예제 #7
0
 def test_abi(self):
     """Check SeqIO agrees with EMBOSS' Abi to FASTQ conversion."""
     # This lets use check the id, sequence, and quality scores
     for filename in ["Abi/3730.ab1", "Abi/empty.ab1"]:
         old = SeqIO.read(filename, "abi")
         cline = SeqretCommandline(
             exes["seqret"],
             sequence=filename,
             sformat="abi",
             osformat="fastq-sanger",
             auto=True,  # no prompting
             stdout=True,
         )
         # Run the tool,
         with subprocess.Popen(
                 str(cline),
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 universal_newlines=True,
                 shell=(sys.platform != "win32"),
         ) as child:
             child.stdin.close()
             new = SeqIO.read(child.stdout, "fastq-sanger")
         if emboss_version == (6, 4, 0) and new.id == "EMBOSS_001":
             # Avoid bug in EMBOSS 6.4.0 (patch forthcoming)
             pass
         else:
             self.assertEqual(old.id, new.id)
         self.assertEqual(str(old.seq), str(new.seq))
         if emboss_version < (6, 3, 0) and new.letter_annotations[
                 "phred_quality"] == [1] * len(old):
             # Apparent bug in EMBOSS 6.2.0.1 on Windows
             pass
         else:
             self.assertEqual(old.letter_annotations,
                              new.letter_annotations)