def test_phenotype_IO(self): '''Test basic functionalities of phenotype IO methods''' p1 = phenotype.read(SMALL_JSON_PLATE, 'pm-json') p2 = next(phenotype.parse(SMALL_CSV_PLATES, 'pm-csv')) handle = StringIO() c = phenotype.write([p1, p2], handle, 'pm-json') self.assertEqual(c, 2) handle.flush() handle.seek(0) # Now ready to read back from the handle... try: records = list(phenotype.parse(handle, 'pm-json')) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(records))) self.assertEqual(p1, records[0]) handle.close() handle = StringIO() self.assertRaises(TypeError, phenotype.write, p1, handle, 1) self.assertRaises(ValueError, phenotype.write, p1, handle, 'PM-JSON') self.assertRaises(ValueError, phenotype.write, p1, handle, 'pm-csv') handle.close()
def test_phenotype_IO(self): """Test basic functionalities of phenotype IO methods.""" p1 = phenotype.read(SMALL_JSON_PLATE, 'pm-json') p2 = next(phenotype.parse(SMALL_CSV_PLATES, 'pm-csv')) handle = StringIO() c = phenotype.write([p1, p2], handle, 'pm-json') self.assertEqual(c, 2) handle.flush() handle.seek(0) # Now ready to read back from the handle... try: records = list(phenotype.parse(handle, 'pm-json')) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(records))) self.assertEqual(p1, records[0]) handle.close() handle = StringIO() self.assertRaises(TypeError, phenotype.write, p1, handle, 1) self.assertRaises(ValueError, phenotype.write, p1, handle, 'PM-JSON') self.assertRaises(ValueError, phenotype.write, p1, handle, 'pm-csv') handle.close()
def __str__(self): """Create a string representation of the MarkovModel object.""" from Bio._py3k import StringIO handle = StringIO() save(self, handle) handle.seek(0) return handle.read()
def __str__(self): """Create a string representation of the MarkovModel object.""" from Bio._py3k import StringIO handle = StringIO() save(self, handle) handle.seek(0) return handle.read()
def test_write_alignment(self): # Default causes no interleave (columns <= 1000) records = [SeqRecord(Seq("ATGCTGCTGA" * 90, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a) handle.seek(0) data = handle.read() self.assertIn("ATGCTGCTGA" * 90, data) # Default causes interleave (columns > 1000) records = [SeqRecord(Seq("ATGCTGCTGA" * 110, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a) handle.seek(0) data = handle.read() self.assertNotIn("ATGCTGCTGA" * 90, data) self.assertIn("ATGCTGCTGA" * 7, data) # Override interleave: True records = [SeqRecord(Seq("ATGCTGCTGA" * 9, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a, interleave=True) handle.seek(0) data = handle.read() self.assertNotIn("ATGCTGCTGA" * 9, data) self.assertIn("ATGCTGCTGA" * 7, data) # Override interleave: False records = [SeqRecord(Seq("ATGCTGCTGA" * 110, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a, interleave=False) handle.seek(0) data = handle.read() self.assertIn("ATGCTGCTGA" * 110, data)
def test_write_alignment(self): # Default causes no interleave (columns <= 1000) records = [SeqRecord(Seq("ATGCTGCTGA" * 90, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a) handle.seek(0) data = handle.read() self.assertIn("ATGCTGCTGA" * 90, data) # Default causes interleave (columns > 1000) records = [SeqRecord(Seq("ATGCTGCTGA" * 110, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a) handle.seek(0) data = handle.read() self.assertNotIn("ATGCTGCTGA" * 90, data) self.assertIn("ATGCTGCTGA" * 7, data) # Override interleave: True records = [SeqRecord(Seq("ATGCTGCTGA" * 9, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a, interleave=True) handle.seek(0) data = handle.read() self.assertNotIn("ATGCTGCTGA" * 9, data) self.assertIn("ATGCTGCTGA" * 7, data) # Override interleave: False records = [SeqRecord(Seq("ATGCTGCTGA" * 110, alphabet=ambiguous_dna), id=_id) for _id in ["foo", "bar", "baz"]] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_alignment(a, interleave=False) handle.seek(0) data = handle.read() self.assertIn("ATGCTGCTGA" * 110, data)
def test_multiple_output(self): records = [ SeqRecord(Seq("ATGCTGCTGAT", alphabet=ambiguous_dna), id="foo"), SeqRecord(Seq("ATGCTGCAGAT", alphabet=ambiguous_dna), id="bar"), SeqRecord(Seq("ATGCTGCGGAT", alphabet=ambiguous_dna), id="baz") ] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) data = handle.read() self.assertTrue(data.startswith("#NEXUS\nbegin data;\n"), data) self.assertTrue(data.endswith("end;\n"), data) handle = StringIO() with self.assertRaises(ValueError): NexusWriter(handle).write_file([a, a])
def test_multiple_output(self): records = [SeqRecord(Seq("ATGCTGCTGAT", alphabet=ambiguous_dna), id="foo"), SeqRecord(Seq("ATGCTGCAGAT", alphabet=ambiguous_dna), id="bar"), SeqRecord(Seq("ATGCTGCGGAT", alphabet=ambiguous_dna), id="baz")] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) data = handle.read() self.assertTrue(data.startswith("#NEXUS\nbegin data;\n"), data) self.assertTrue(data.endswith("end;\n"), data) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass
def test_empty_file(self): print("Reading an empty file") assert 0 == len(list(NexusIterator(StringIO()))) print("Done") print("") print("Writing...") handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) print(handle.read()) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass
def test_multiple_output(self): records = [SeqRecord(Seq("ATGCTGCTGAT", alphabet=ambiguous_dna), id="foo"), SeqRecord(Seq("ATGCTGCAGAT", alphabet=ambiguous_dna), id="bar"), SeqRecord(Seq("ATGCTGCGGAT", alphabet=ambiguous_dna), id="baz")] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) data = handle.read() self.assertTrue(data.startswith("#NEXUS\nbegin data;\n"), data) self.assertTrue(data.endswith("end;\n"), data) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass
def test_empty_file(self): print("Reading an empty file") assert 0 == len(list(NexusIterator(StringIO()))) print("Done") print("") print("Writing...") handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) print(handle.read()) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass
def check_simple_write_read(alignments, indent=" "): # print(indent+"Checking we can write and then read back these alignments") for format in test_write_read_align_with_seq_count: records_per_alignment = len(alignments[0]) for a in alignments: if records_per_alignment != len(a): records_per_alignment = None # Can we expect this format to work? if not records_per_alignment \ and format not in test_write_read_alignment_formats: continue print(indent + "Checking can write/read as '%s' format" % format) # Going to write to a handle... handle = StringIO() try: c = AlignIO.write(alignments, handle=handle, format=format) assert c == len(alignments) except ValueError as e: # This is often expected to happen, for example when we try and # write sequences of different lengths to an alignment file. print(indent + "Failed: %s" % str(e)) # Carry on to the next format: continue # First, try with the seq_count if records_per_alignment: handle.flush() handle.seek(0) try: alignments2 = list( AlignIO.parse(handle=handle, format=format, seq_count=records_per_alignment)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError( "%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if format in test_write_read_alignment_formats: # Don't need the seq_count handle.flush() handle.seek(0) try: alignments2 = list(AlignIO.parse(handle=handle, format=format)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError( "%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if len(alignments) > 1: # Try writing just one Alignment (not a list) handle = StringIO() AlignIO.write(alignments[0:1], handle, format) assert handle.getvalue() == alignments[0].format(format)
if t_format != "nexus": # Hack for bug 2535 pssm = summary.pos_specific_score_matrix() rep_dict = summary.replacement_dictionary() try: info_content = summary.information_content() except ValueError as err: if str( err ) != "Error in alphabet: not Nucleotide or Protein, supply expected frequencies": raise err if t_count == 1 and t_format not in ["nexus", "emboss", "fasta-m10"]: # print(" Trying to read a triple concatenation of the input file") with open(t_filename, "r") as handle: data = handle.read() handle = StringIO() handle.write(data + "\n\n" + data + "\n\n" + data) handle.seek(0) assert len( list(AlignIO.parse(handle=handle, format=t_format, seq_count=t_per))) == 3 handle.close() # Some alignment file formats have magic characters which mean # use the letter in this position in the first sequence. # They should all have been converted by the parser, but if # not reversing the record order might expose an error. Maybe. alignments.reverse() check_simple_write_read(alignments)
def check_simple_write_read(alignments, indent=" "): # print(indent+"Checking we can write and then read back these alignments") for format in test_write_read_align_with_seq_count: records_per_alignment = len(alignments[0]) for a in alignments: if records_per_alignment != len(a): records_per_alignment = None # Can we expect this format to work? if not records_per_alignment \ and format not in test_write_read_alignment_formats: continue print(indent + "Checking can write/read as '%s' format" % format) # Going to write to a handle... handle = StringIO() try: c = AlignIO.write(alignments, handle=handle, format=format) assert c == len(alignments) except ValueError as e: # This is often expected to happen, for example when we try and # write sequences of different lengths to an alignment file. print(indent + "Failed: %s" % str(e)) # Carry on to the next format: continue # First, try with the seq_count if records_per_alignment: handle.flush() handle.seek(0) try: alignments2 = list(AlignIO.parse(handle=handle, format=format, seq_count=records_per_alignment)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if format in test_write_read_alignment_formats: # Don't need the seq_count handle.flush() handle.seek(0) try: alignments2 = list(AlignIO.parse(handle=handle, format=format)) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(alignments2))) simple_alignment_comparison(alignments, alignments2, format) if len(alignments) > 1: # Try writing just one Alignment (not a list) handle = StringIO() SeqIO.write(alignments[0], handle, format) assert handle.getvalue() == alignments[0].format(format)
# gap_consensus = summary.gap_consensus() if t_format != "nexus": # Hack for bug 2535 pssm = summary.pos_specific_score_matrix() rep_dict = summary.replacement_dictionary() try: info_content = summary.information_content() except ValueError as e: if str(e) != "Error in alphabet: not Nucleotide or Protein, supply expected frequencies": raise e pass if t_count == 1 and t_format not in ["nexus", "emboss", "fasta-m10"]: # print(" Trying to read a triple concatenation of the input file") with open(t_filename, "r") as handle: data = handle.read() handle = StringIO() handle.write(data + "\n\n" + data + "\n\n" + data) handle.seek(0) assert 3 == len(list(AlignIO.parse(handle=handle, format=t_format, seq_count=t_per))) handle.close() # Some alignment file formats have magic characters which mean # use the letter in this position in the first sequence. # They should all have been converted by the parser, but if # not reversing the record order might expose an error. Maybe. alignments.reverse() check_simple_write_read(alignments) print("Finished tested reading files")
CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---- ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK---- CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X ; end; """) for a in NexusIterator(handle): print(a) for r in a: print("%r %s %s" % (r.seq, r.name, r.id)) print("Done") print("") print("Reading an empty file") assert 0 == len(list(NexusIterator(StringIO()))) print("Done") print("") print("Writing...") handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) print(handle.read()) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass
def __str__(self): from Bio._py3k import StringIO handle = StringIO() save(self, handle) handle.seek(0) return handle.read()
CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---- ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK---- CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X ; end; """) for a in NexusIterator(handle): print(a) for r in a: print("%r %s %s" % (r.seq, r.name, r.id)) print("Done") print("") print("Reading an empty file") assert 0 == len(list(NexusIterator(StringIO()))) print("Done") print("") print("Writing...") handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) print(handle.read()) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass