def check_EMBOSS_to_AlignIO(self, filename, old_format, skip_formats=()): """Check AlignIO can read seqret's conversion of the file.""" self.assertTrue(os.path.isfile(filename), filename) old_aligns = list(AlignIO.parse(filename, old_format)) formats = ["clustal", "phylip", "ig", "msf"] if len(old_aligns) == 1: formats.extend(["fasta", "nexus"]) for new_format in formats: if new_format in skip_formats: continue cline = SeqretCommandline( exes["seqret"], sequence=filename, sformat=old_format, osformat=new_format, auto=True, # no prompting stdout=True, ) # Run the tool, with subprocess.Popen( str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) as child: child.stdin.close() new_aligns = list(AlignIO.parse(child.stdout, new_format)) msg = "converting %s from %s to %s" % (filename, old_format, new_format) self.compare_alignments(old_aligns, new_aligns, msg)
def emboss_piped_SeqIO_convert(records, old_format, new_format): """Run seqret, returns records (as a generator).""" # Setup, this assumes for all the format names used # Biopython and EMBOSS names are consistent! cline = SeqretCommandline( exes["seqret"], sformat=old_format, osformat=new_format, auto=True, # no prompting filter=True, ) # Run the tool, child = subprocess.Popen( str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) SeqIO.write(records, child.stdin, old_format) child.stdin.close() child.stderr.close() # TODO - Is there a nice way to return an iterator AND # automatically close the handle? records = list(SeqIO.parse(child.stdout, new_format)) child.stdout.close() return records
def check_EMBOSS_to_SeqIO(self, filename, old_format, skip_formats=()): """Check SeqIO can read read seqret's conversion output.""" # TODO: Why can't we read EMBOSS's swiss output? self.assertTrue(os.path.isfile(filename)) old_records = list(SeqIO.parse(filename, old_format)) for new_format in ["genbank", "fasta", "pir", "embl", "ig"]: if new_format in skip_formats: continue cline = SeqretCommandline( exes["seqret"], sequence=filename, sformat=old_format, osformat=new_format, auto=True, # no prompting stdout=True, ) # Run the tool, with subprocess.Popen( str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) as child: child.stdin.close() new_records = list(SeqIO.parse(child.stdout, new_format)) msg = "converting %s from %s to %s" % (filename, old_format, new_format) self.compare_records(old_records, new_records, msg)
def emboss_piped_SeqIO_convert(records, old_format, new_format): """Run seqret, returns records (as a generator).""" #Setup, this assumes for all the format names used #Biopython and EMBOSS names are consistent! cline = SeqretCommandline( exes["seqret"], sformat=old_format, osformat=new_format, auto=True, #no prompting filter=True) #Run the tool, child = subprocess.Popen(str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform != "win32")) SeqIO.write(records, child.stdin, old_format) child.stdin.close() return SeqIO.parse(child.stdout, new_format)
def emboss_convert(filename, old_format, new_format): """Run seqret, returns handle.""" #Setup, this assumes for all the format names used #Biopython and EMBOSS names are consistent! cline = SeqretCommandline( exes["seqret"], sequence=filename, sformat=old_format, osformat=new_format, auto=True, #no prompting stdout=True) #Run the tool, child = subprocess.Popen(str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform != "win32")) child.stdin.close() return child.stdout
def emboss_piped_AlignIO_convert(alignments, old_format, new_format): """Run seqret, returns alignments (as a generator).""" #Setup, this assumes for all the format names used #Biopython and EMBOSS names are consistent! cline = SeqretCommandline(exes["seqret"], sformat = old_format, osformat = new_format, auto = True, #no prompting filter = True) #Run the tool, child = subprocess.Popen(str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform!="win32")) try: AlignIO.write(alignments, child.stdin, old_format) except Exception, err: child.stdin.close() child.stderr.close() child.stdout.close() raise
def test_abi(self): """Check SeqIO agrees with EMBOSS' Abi to FASTQ conversion.""" # This lets use check the id, sequence, and quality scores for filename in ["Abi/3730.ab1", "Abi/empty.ab1"]: old = SeqIO.read(filename, "abi") cline = SeqretCommandline( exes["seqret"], sequence=filename, sformat="abi", osformat="fastq-sanger", auto=True, # no prompting stdout=True, ) # Run the tool, with subprocess.Popen( str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) as child: child.stdin.close() new = SeqIO.read(child.stdout, "fastq-sanger") if emboss_version == (6, 4, 0) and new.id == "EMBOSS_001": # Avoid bug in EMBOSS 6.4.0 (patch forthcoming) pass else: self.assertEqual(old.id, new.id) self.assertEqual(str(old.seq), str(new.seq)) if emboss_version < (6, 3, 0) and new.letter_annotations[ "phred_quality"] == [1] * len(old): # Apparent bug in EMBOSS 6.2.0.1 on Windows pass else: self.assertEqual(old.letter_annotations, new.letter_annotations)