def test_save(self): trieobj = trie.trie() trieobj["foo"] = 1 k = trieobj.keys() self.assertEqual(k, ["foo"]) v = trieobj.values() self.assertEqual(v, [1]) self.assertEqual(trieobj.get("bar", 99), 99) trieobj["hello"] = "55a" self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foa", 0), []) self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)]) x = sorted(trieobj.get_approximate("foa", 2)) self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)]) # foo foo- foo- # foa f-oa fo-a # mismatch a->o # insertion after f, deletion of o # insertion after o, deletion of o x = trieobj.get_approximate("foo", 4) y = {} for z in x: y[z] = y.get(z, 0) + 1 x = sorted(y.items()) self.assertEqual(x, [(("foo", 1, 0), 1), (("hello", "55a", 4), 6)]) h = StringIO() trie.save(h, trieobj) h.seek(0) trieobj = trie.load(h) k = trieobj.keys() self.assertTrue("foo" in k) self.assertTrue("hello" in k) self.assertEqual(repr(trieobj["foo"]), "1") self.assertEqual(repr(trieobj["hello"]), "'55a'")
def test_write_species(self): """Test writing species from annotation tags.""" record = SeqIO.read("SwissProt/sp016", "swiss") self.assertEqual(record.annotations["organism"], "H**o sapiens (Human)") self.assertEqual(record.annotations["ncbi_taxid"], ["9606"]) handle = StringIO() SeqIO.write(record, handle, "seqxml") handle.seek(0) output = handle.getvalue() self.assertTrue("H**o sapiens (Human)" in output) self.assertTrue("9606" in output) if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output: # Good, but don't get this (do we?) pass elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output: # Not as concise, but fine (seen on C Python) pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output: # Jython uses a different order pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output: # This would be fine too, but don't get this (do we?) pass else: raise ValueError("Mising expected <species> tag: %r" % output)
def test_phenotype_IO(self): '''Test basic functionalities of phenotype IO methods''' p1 = phenotype.read(SMALL_JSON_PLATE, 'pm-json') p2 = next(phenotype.parse(SMALL_CSV_PLATES, 'pm-csv')) handle = StringIO() c = phenotype.write([p1, p2], handle, 'pm-json') self.assertEqual(c, 2) handle.flush() handle.seek(0) # Now ready to read back from the handle... try: records = list(phenotype.parse(handle, 'pm-json')) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(records))) self.assertEqual(p1, records[0]) handle.close() handle = StringIO() self.assertRaises(TypeError, phenotype.write, p1, handle, 1) self.assertRaises(ValueError, phenotype.write, p1, handle, 'PM-JSON') self.assertRaises(ValueError, phenotype.write, p1, handle, 'pm-csv') handle.close()
def test_generated(self): """Write and read back odd SeqRecord objects""" record1 = SeqRecord(Seq("ACGT"*500, generic_dna), id="Test", description="Long "*500, letter_annotations={"phred_quality":[40,30,20,10]*500}) record2 = SeqRecord(MutableSeq("NGGC"*1000), id="Mut", description="very "*1000+"long", letter_annotations={"phred_quality":[0,5,5,10]*1000}) record3 = SeqRecord(UnknownSeq(2000,character="N"), id="Unk", description="l"+("o"*1000)+"ng", letter_annotations={"phred_quality":[0,1]*1000}) record4 = SeqRecord(Seq("ACGT"*500), id="no_descr", description="", name="", letter_annotations={"phred_quality":[40,50,60,62]*500}) record5 = SeqRecord(Seq("",generic_dna), id="empty_p", description="(could have been trimmed lots)", letter_annotations={"phred_quality":[]}) record6 = SeqRecord(Seq(""), id="empty_s", description="(could have been trimmed lots)", letter_annotations={"solexa_quality":[]}) record7 = SeqRecord(Seq("ACNN"*500), id="Test_Sol", description="Long "*500, letter_annotations={"solexa_quality":[40,30,0,-5]*500}) record8 = SeqRecord(Seq("ACGT"), id="HighQual", description="With very large qualities that even Sanger FASTQ can't hold!", letter_annotations={"solexa_quality":[0,10,100,1000]}) #TODO - Record with no identifier? records = [record1, record2, record3, record4, record5, record6, record7, record8] #TODO - Have a Biopython defined "DataLossWarning?" warnings.simplefilter('ignore', BiopythonWarning) #TODO - Include phd output? for format in ["fasta", "fastq", "fastq-solexa", "fastq-illumina", "qual"]: handle = StringIO() SeqIO.write(records, handle, format) handle.seek(0) compare_records(records, list(SeqIO.parse(handle, format)), truncation_expected(format)) warnings.filters.pop()
def test_genbank_date_list(self): """Check if date lists are handled correctly""" sequence_object = Seq("ATGC", generic_dna) record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') record.annotations["date"] = ["24-DEC-2015"] handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "24-DEC-2015") record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') record.annotations["date"] = ["24-DEC-2015", "25-JAN-2016"] handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "01-JAN-1980")
def __str__(self): """Create a string representation of the MarkovModel object.""" from Bio._py3k import StringIO handle = StringIO() save(self, handle) handle.seek(0) return handle.read()
def test_generated(self): """Write and read back odd SeqRecord objects""" record1 = SeqRecord(Seq("ACGT" * 500, generic_dna), id="Test", description="Long " * 500, letter_annotations={"phred_quality": [40, 30, 20, 10] * 500}) record2 = SeqRecord(MutableSeq("NGGC" * 1000), id="Mut", description="very " * 1000 + "long", letter_annotations={"phred_quality": [0, 5, 5, 10] * 1000}) record3 = SeqRecord(UnknownSeq(2000, character="N"), id="Unk", description="l" + ("o" * 1000) + "ng", letter_annotations={"phred_quality": [0, 1] * 1000}) record4 = SeqRecord(Seq("ACGT" * 500), id="no_descr", description="", name="", letter_annotations={"phred_quality": [40, 50, 60, 62] * 500}) record5 = SeqRecord(Seq("", generic_dna), id="empty_p", description="(could have been trimmed lots)", letter_annotations={"phred_quality": []}) record6 = SeqRecord(Seq(""), id="empty_s", description="(could have been trimmed lots)", letter_annotations={"solexa_quality": []}) record7 = SeqRecord(Seq("ACNN" * 500), id="Test_Sol", description="Long " * 500, letter_annotations={"solexa_quality": [40, 30, 0, -5] * 500}) record8 = SeqRecord(Seq("ACGT"), id="HighQual", description="With very large qualities that even Sanger FASTQ can't hold!", letter_annotations={"solexa_quality": [0, 10, 100, 1000]}) # TODO - Record with no identifier? records = [record1, record2, record3, record4, record5, record6, record7, record8] for format in ["fasta", "fastq", "fastq-solexa", "fastq-illumina", "qual"]: handle = StringIO() with warnings.catch_warnings(): # TODO - Have a Biopython defined "DataLossWarning?" warnings.simplefilter('ignore', BiopythonWarning) SeqIO.write(records, handle, format) handle.seek(0) compare_records(records, list(SeqIO.parse(handle, format)), truncation_expected(format))
def check_convert_fails(in_filename, in_format, out_format, alphabet=None): qual_truncate = truncation_expected(out_format) #We want the SAME error message from parse/write as convert! err1 = None try: records = list(SeqIO.parse(in_filename,in_format, alphabet)) handle = StringIO() if qual_truncate: warnings.simplefilter('ignore', UserWarning) SeqIO.write(records, handle, out_format) if qual_truncate: warnings.filters.pop() handle.seek(0) assert False, "Parse or write should have failed!" except ValueError as err: err1 = err #Now do the conversion... try: handle2 = StringIO() if qual_truncate: warnings.simplefilter('ignore', UserWarning) SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet) if qual_truncate: warnings.filters.pop() assert False, "Convert should have failed!" except ValueError as err2: assert str(err1) == str(err2), \ "Different failures, parse/write:\n%s\nconvert:\n%s" \ % (err1, err2)
def test_genbank_date_list(self): """Check if date lists are handled correctly""" sequence_object = Seq("ATGC", generic_dna) record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') record.annotations["date"] = ["24-DEC-2015"] handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "24-DEC-2015") record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') record.annotations["date"] = ["24-DEC-2015", "25-JAN-2016"] handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "01-JAN-1980")
def test_000_write_invalid_but_parsed_locus_line(self): """Make sure we survive writing slightly invalid LOCUS lines we could parse.""" # grab a valid file with open(path.join('GenBank', 'NC_005816.gb'), 'r') as handle: lines = handle.readlines() # futz with the molecule type to make it lower case invalid_line = "LOCUS NC_005816 9609 bp dna circular BCT 21-JUL-2008\n" lines[0] = invalid_line fake_handle = StringIO("".join(lines)) # Make sure parsing this actually raises a warning with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always") rec = SeqIO.read(fake_handle, 'genbank') self.assertEqual(len(caught), 1) self.assertEqual(caught[0].category, BiopythonParserWarning) self.assertEqual(str(caught[0].message), "Non-upper case molecule type in LOCUS line: dna") out_handle = StringIO() ret = SeqIO.write([rec], out_handle, 'genbank') self.assertEqual(ret, 1) out_handle.seek(0) out_lines = out_handle.readlines() self.assertEqual(out_lines[0], invalid_line)
def test_write_species(self): """Test writing species from annotation tags.""" record = SeqIO.read("SwissProt/sp016", "swiss") self.assertEqual(record.annotations["organism"], "H**o sapiens (Human)") self.assertEqual(record.annotations["ncbi_taxid"], ["9606"]) handle = StringIO() SeqIO.write(record, handle, "seqxml") handle.seek(0) output = handle.getvalue() self.assertIn("H**o sapiens (Human)", output) self.assertIn("9606", output) if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output: # Good, but don't get this (do we?) pass elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output: # Not as concise, but fine (seen on C Python) pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output: # Jython uses a different order pass elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output: # This would be fine too, but don't get this (do we?) pass else: raise ValueError("Mising expected <species> tag: %r" % output)
def test_000_write_invalid_but_parsed_locus_line(self): """Make sure we survive writing slightly invalid LOCUS lines we could parse.""" # grab a valid file with open(path.join('GenBank', 'NC_005816.gb'), 'r') as handle: lines = handle.readlines() # futz with the molecule type to make it lower case invalid_line = "LOCUS NC_005816 9609 bp dna circular BCT 21-JUL-2008\n" lines[0] = invalid_line fake_handle = StringIO("".join(lines)) # Make sure parsing this actually raises a warning with warnings.catch_warnings(record=True) as caught: warnings.simplefilter("always") rec = SeqIO.read(fake_handle, 'genbank') self.assertEqual(len(caught), 1) self.assertEqual(caught[0].category, BiopythonParserWarning) self.assertEqual(str(caught[0].message), "Non-upper case molecule type in LOCUS line: dna") out_handle = StringIO() ret = SeqIO.write([rec], out_handle, 'genbank') self.assertEqual(ret, 1) out_handle.seek(0) out_lines = out_handle.readlines() self.assertEqual(out_lines[0], invalid_line)
def check_convert_fails(in_filename, in_format, out_format, alphabet=None): qual_truncate = truncation_expected(out_format) # We want the SAME error message from parse/write as convert! err1 = None try: records = list(SeqIO.parse(in_filename, in_format, alphabet)) handle = StringIO() with warnings.catch_warnings(): if qual_truncate: warnings.simplefilter("ignore", BiopythonWarning) SeqIO.write(records, handle, out_format) handle.seek(0) raise ValueError("Parse or write should have failed!") except ValueError as err: err1 = err # Now do the conversion... try: handle2 = StringIO() with warnings.catch_warnings(): if qual_truncate: warnings.simplefilter("ignore", BiopythonWarning) SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet) raise ValueError("Convert should have failed!") except ValueError as err2: assert str(err1) == str(err2), \ "Different failures, parse/write:\n%s\nconvert:\n%s" \ % (err1, err2)
def test_save(self): trieobj = trie.trie() trieobj["foo"] = 1 k = list(trieobj.keys()) self.assertEqual(k, ["foo"]) v = list(trieobj.values()) self.assertEqual(v, [1]) self.assertEqual(trieobj.get("bar", 99), 99) trieobj["hello"] = '55a' self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)]) self.assertEqual(trieobj.get_approximate("foa", 0), []) self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)]) x = sorted(trieobj.get_approximate("foa", 2)) self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)]) # foo foo- foo- # foa f-oa fo-a # mismatch a->o # insertion after f, deletion of o # insertion after o, deletion of o x = trieobj.get_approximate("foo", 4) y = {} for z in x: y[z] = y.get(z, 0) + 1 x = sorted(y.items()) self.assertEqual(x, [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)]) h = StringIO() trie.save(h, trieobj) h.seek(0) trieobj = trie.load(h) k = list(trieobj.keys()) self.assertTrue("foo" in k) self.assertTrue("hello" in k) self.assertEqual(repr(trieobj["foo"]), '1') self.assertEqual(repr(trieobj["hello"]), "'55a'")
def loop(self, filename, format): original_records = list(SeqIO.parse(open(filename, "rU"), format)) # now open a connection to load the database server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) db_name = "test_loop_%s" % filename # new namespace! db = server.new_database(db_name) count = db.load(original_records) self.assertEqual(count, len(original_records)) server.commit() #Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] #And check they agree self.assertTrue(compare_records(original_records, biosql_records)) #Now write to a handle... handle = StringIO() SeqIO.write(biosql_records, handle, "gb") #Now read them back... handle.seek(0) new_records = list(SeqIO.parse(handle, "gb")) #And check they still agree self.assertEqual(len(new_records), len(original_records)) for old, new in zip(original_records, new_records): #TODO - remove this hack because we don't yet write these (yet): for key in ["comment", "references", "db_source"]: if key in old.annotations and key not in new.annotations: del old.annotations[key] self.assertTrue(compare_record(old, new)) #Done server.close()
def __str__(self): """Create a string representation of the MarkovModel object.""" from Bio._py3k import StringIO handle = StringIO() save(self, handle) handle.seek(0) return handle.read()
def test_save_and_load(self): states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.75, 0.25], [0.25, 0.75]]) p_emission = array( [[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]]) markov_model_save = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) handle = StringIO() MarkovModel.save(markov_model_save, handle) handle.seek(0) markov_model_load = MarkovModel.load(handle) self.assertEqual(''.join(markov_model_load.states), states) self.assertEqual(''.join(markov_model_load.alphabet), alphabet) self.assertTrue(array_equal(markov_model_load.p_initial, p_initial)) self.assertTrue(array_equal (markov_model_load.p_transition, p_transition)) self.assertTrue(array_equal(markov_model_load.p_emission, p_emission))
def test_phenotype_IO(self): """Test basic functionalities of phenotype IO methods.""" p1 = phenotype.read(SMALL_JSON_PLATE, "pm-json") p2 = next(phenotype.parse(SMALL_CSV_PLATES, "pm-csv")) handle = StringIO() c = phenotype.write([p1, p2], handle, "pm-json") self.assertEqual(c, 2) handle.flush() handle.seek(0) # Now ready to read back from the handle... try: records = list(phenotype.parse(handle, "pm-json")) except ValueError as e: # This is BAD. We can't read our own output. # I want to see the output when called from the test harness, # run_tests.py (which can be funny about new lines on Windows) handle.seek(0) raise ValueError("%s\n\n%s\n\n%s" % (str(e), repr(handle.read()), repr(records))) self.assertEqual(p1, records[0]) handle.close() handle = StringIO() self.assertRaises(TypeError, phenotype.write, p1, handle, 1) self.assertRaises(ValueError, phenotype.write, p1, handle, "PM-JSON") self.assertRaises(ValueError, phenotype.write, p1, handle, "pm-csv") handle.close()
def loop(self, filename, format): original_records = list(SeqIO.parse(filename, format)) # now open a connection to load the database server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) db_name = "test_loop_%s" % filename # new namespace! db = server.new_database(db_name) count = db.load(original_records) self.assertEqual(count, len(original_records)) server.commit() # Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] # And check they agree self.assertTrue(compare_records(original_records, biosql_records)) # Now write to a handle... handle = StringIO() SeqIO.write(biosql_records, handle, "gb") # Now read them back... handle.seek(0) new_records = list(SeqIO.parse(handle, "gb")) # And check they still agree self.assertEqual(len(new_records), len(original_records)) for old, new in zip(original_records, new_records): # TODO - remove this hack because we don't yet write these (yet): for key in ["comment", "references", "db_source"]: if key in old.annotations and key not in new.annotations: del old.annotations[key] self.assertTrue(compare_record(old, new)) # Done handle.close() server.close()
def test_write_read(self): """Checking write/read.""" alignments = list(ClustalIterator(StringIO(aln_example1))) + list(ClustalIterator(StringIO(aln_example2))) * 2 handle = StringIO() self.assertEqual(3, ClustalWriter(handle).write_file(alignments)) handle.seek(0) for i, a in enumerate(ClustalIterator(handle)): self.assertEqual(a.get_alignment_length(), alignments[i].get_alignment_length())
def test_write_read(self): """Checking write/read.""" alignments = (list(ClustalIterator(StringIO(aln_example1))) + list(ClustalIterator(StringIO(aln_example2))) * 2) handle = StringIO() self.assertEqual(3, ClustalWriter(handle).write_file(alignments)) handle.seek(0) for i, a in enumerate(ClustalIterator(handle)): self.assertEqual(a.get_alignment_length(), alignments[i].get_alignment_length())
def read_longer_than_maxsize(): with open(path.join("GenBank", "DS830848.gb"), 'r') as inhandle: data2 = inhandle.readlines() data2[0] = "LOCUS AZZZAA02123456789 " + str(sys.maxsize + 1) + " bp DNA linear PRI 15-OCT-2018\n" long_in_tmp = StringIO() long_in_tmp.writelines(data2) long_in_tmp.seek(0) record = SeqIO.read(long_in_tmp, 'genbank')
def test_write_read_single(self): """Testing write/read when there is only one sequence.""" alignment = next(ClustalIterator(StringIO(aln_example1))) # Now thae just the first row as a new alignment: alignment = alignment[0:1] handle = StringIO() ClustalWriter(handle).write_file([alignment]) handle.seek(0) for i, a in enumerate(ClustalIterator(handle)): self.assertEqual(a.get_alignment_length(), alignment.get_alignment_length()) self.assertEqual(len(a), 1)
def test_write_read_single(self): """Testing write/read when there is only one sequence.""" alignment = next(ClustalIterator(StringIO(aln_example1))) # Now thae just the first row as a new alignment: alignment = alignment[0:1] handle = StringIO() ClustalWriter(handle).write_file([alignment]) handle.seek(0) for i, a in enumerate(ClustalIterator(handle)): self.assertEqual(a.get_alignment_length(), alignment.get_alignment_length()) self.assertEqual(len(a), 1)
def _write_parse_and_compare(self, read1_records): handle = StringIO() SeqIO.write(read1_records, handle, "seqxml") handle.seek(0) read2_records = list(SeqIO.parse(handle, "seqxml")) self.assertEqual(len(read1_records), len(read2_records)) for record1, record2 in zip(read1_records, read2_records): assert_equal_records(self, record1, record2)
def test_convert(self): """Convert a tree between all supported formats.""" mem_file_1 = StringIO() mem_file_2 = BytesIO() mem_file_3 = StringIO() Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus') mem_file_1.seek(0) Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml') mem_file_2.seek(0) Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick') mem_file_3.seek(0) tree = Phylo.read(mem_file_3, 'newick') self.assertEqual(len(tree.get_terminals()), 28)
def _write_parse_and_compare(self, read1_records): handle = StringIO() SeqIO.write(read1_records, handle, "seqxml") handle.seek(0) read2_records = list(SeqIO.parse(handle, "seqxml")) self.assertEqual(len(read1_records), len(read2_records)) for record1, record2 in zip(read1_records, read2_records): assert_equal_records(self, record1, record2)
def test_genbank_date_default(self): """Check if default date is handled correctly.""" sequence_object = Seq("ATGC", generic_dna) # check if default value is inserted correctly record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "01-JAN-1980")
def test_genbank_date_correct(self): """Check if user provided date is inserted correctly.""" sequence_object = Seq("ATGC", generic_dna) record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') record.annotations["date"] = "24-DEC-2015" handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "24-DEC-2015")
def test_genbank_date_datetime(self): """Check if datetime objects are handled correctly.""" sequence_object = Seq("ATGC", generic_dna) record = SeqRecord(sequence_object, id='123456789', name='UnitTest', description='Test case for date parsing') record.annotations["date"] = datetime(2000, 2, 2) handle = StringIO() SeqIO.write(record, handle, 'genbank') handle.seek(0) gb = SeqIO.read(handle, "gb") self.assertEqual(gb.annotations["date"], "02-FEB-2000")
def test_convert(self): """Convert a tree between all supported formats.""" mem_file_1 = StringIO() mem_file_2 = BytesIO() mem_file_3 = StringIO() Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus') mem_file_1.seek(0) Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml') mem_file_2.seek(0) Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick') mem_file_3.seek(0) tree = Phylo.read(mem_file_3, 'newick') self.assertEqual(len(tree.get_terminals()), 28)
def test_newick_write(self): """Parse a Nexus file with multiple trees.""" # Tree with internal node labels mem_file = StringIO() tree = Phylo.read(StringIO("(A,B,(C,D)E)F;"), "newick") Phylo.write(tree, mem_file, "newick") mem_file.seek(0) tree2 = Phylo.read(mem_file, "newick") # Sanity check self.assertEqual(tree2.count_terminals(), 4) # Check internal node labels were retained internal_names = set(c.name for c in tree2.get_nonterminals() if c is not None) self.assertEqual(internal_names, set(("E", "F")))
def check_rewrite(self, filename): old = SeqIO.read(filename, "embl") #TODO - Check these properties: old.dbxrefs = [] old.annotations['accessions'] = old.annotations['accessions'][:1] del old.annotations['references'] buffer = StringIO() self.assertEqual(1, SeqIO.write(old, buffer, "embl")) buffer.seek(0) new = SeqIO.read(buffer, "embl") self.assertTrue(compare_record(old, new))
def test_TaggingConsumer(self): h = StringIO() tc = ParserSupport.TaggingConsumer(handle=h, colwidth=5) tc.start_section() self.assertEqual(h.getvalue(), "***** start_section\n") h.seek(0) h.truncate(0) tc.test1("myline") self.assertEqual(h.getvalue(), "test1: myline\n") h.seek(0) h.truncate(0) tc.end_section() self.assertEqual(h.getvalue(), "***** end_section\n")
def test_illumina_to_sanger(self): """Mapping check for FASTQ Illumina (0 to 62) to Sanger (0 to 62)""" seq = "N"*63 qual = "".join(chr(64+q) for q in range(0,63)) expected_phred = range(63) in_handle = StringIO("@Test\n%s\n+\n%s" % (seq,qual)) out_handle = StringIO() SeqIO.write(SeqIO.parse(in_handle, "fastq-illumina"), out_handle, "fastq-sanger") out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def check_rewrite(self, filename): old = SeqIO.read(filename, "embl") # TODO - Check these properties: old.dbxrefs = [] old.annotations['accessions'] = old.annotations['accessions'][:1] del old.annotations['references'] buffer = StringIO() self.assertEqual(1, SeqIO.write(old, buffer, "embl")) buffer.seek(0) new = SeqIO.read(buffer, "embl") self.assertTrue(compare_record(old, new))
def test_newick_write(self): """Parse a Nexus file with multiple trees.""" # Tree with internal node labels mem_file = StringIO() tree = Phylo.read(StringIO('(A,B,(C,D)E)F;'), 'newick') Phylo.write(tree, mem_file, 'newick') mem_file.seek(0) tree2 = Phylo.read(mem_file, 'newick') # Sanity check self.assertEqual(tree2.count_terminals(), 4) # Check internal node labels were retained internal_names = set(c.name for c in tree2.get_nonterminals() if c is not None) self.assertEqual(internal_names, set(('E', 'F')))
def test_illumina_to_sanger(self): """Mapping check for FASTQ Illumina (0 to 62) to Sanger (0 to 62)""" seq = "N" * 63 qual = "".join(chr(64 + q) for q in range(0, 63)) expected_phred = range(63) in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() SeqIO.write(SeqIO.parse(in_handle, "fastq-illumina"), out_handle, "fastq-sanger") out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_long_names(self): """Various GenBank names which push the column based LOCUS line.""" original = SeqIO.read("GenBank/iro.gb", "gb") self.assertEqual(len(original), 1326) # Acceptability of LOCUS line with length > 80 invalidates some of these tests for name, seq_len, ok in [ ("short", 1, True), ("max_length_of_16", 1000, True), ("overly_long_at_17", 1000, True), ("excessively_long_at_22", 99999, True), ("excessively_long_at_22", 100000, True), ("pushing_the_limits_at_24", 999, True), ("pushing_the_limits_at_24", 1000, True), ("old_max_name_length_was_26", 10, True), # 2 digits ("old_max_name_length_was_26", 9, True)]: # 1 digit # Make the length match the desired target record = original[:] # TODO - Implement Seq * int record.seq = Seq("N" * seq_len, original.seq.alphabet) # Set the identifer to the desired name record.id = record.name = name # Attempt to output the record... if not ok: # e.g. ValueError: Locus identifier 'excessively_long_at_22' is too long self.assertRaises(ValueError, record.format, "gb") continue with warnings.catch_warnings(): # e.g. BiopythonWarning: Stealing space from length field to # allow long name in LOCUS line warnings.simplefilter("ignore", BiopythonWarning) # output = record.format("gb") handle = StringIO() self.assertEqual(1, SeqIO.write(record, handle, "gb")) handle.seek(0) line = handle.readline() self.assertIn(" %s " % name, line) self.assertIn(" %i bp " % seq_len, line) # Splitting based on whitespace rather than position due to # updated GenBank specification name_and_length = line.split()[1:3] self.assertEqual(name_and_length, [name, str(seq_len)], line) handle.seek(0) with warnings.catch_warnings(): # e.g. BiopythonParserWarning: GenBank LOCUS line # identifier over 16 characters warnings.simplefilter("ignore", BiopythonWarning) new = SeqIO.read(handle, "gb") self.assertEqual(name, new.name) self.assertEqual(seq_len, len(new))
def test_write_read(self): handle = open(self.SIMPLE_XMFA, 'r') aln_list = list(MauveIterator(handle)) handle.close() handle = StringIO() MauveWriter(handle).write_file(aln_list) handle.seek(0) aln_list_out = list(MauveIterator(handle)) for a1, a2 in zip(aln_list, aln_list_out): self.assertEqual(len(a1), len(a2)) for r1, r2 in zip(a1, a2): self.assertEqual(r1.id, r2.id) self.assertEqual(str(r1.seq), str(r2.seq))
def test_write_read(self): handle = open(self.SIMPLE_XMFA, 'r') aln_list = list(MauveIterator(handle)) handle.close() handle = StringIO() MauveWriter(handle).write_file(aln_list) handle.seek(0) aln_list_out = list(MauveIterator(handle)) for a1, a2 in zip(aln_list, aln_list_out): self.assertEqual(len(a1), len(a2)) for r1, r2 in zip(a1, a2): self.assertEqual(r1.id, r2.id) self.assertEqual(str(r1.seq), str(r2.seq))
def test_write_read(self): handle = StringIO(phylip_text5a) list5 = list(PhylipIterator(handle)) handle.close() handle = StringIO() PhylipWriter(handle).write_file(list5) handle.seek(0) list6 = list(PhylipIterator(handle)) self.assertEqual(len(list5), len(list6)) for a1, a2 in zip(list5, list6): self.assertEqual(len(a1), len(a2)) for r1, r2 in zip(a1, a2): self.assertEqual(r1.id, r2.id) self.assertEqual(str(r1.seq), str(r2.seq))
def test_write_read(self): handle = StringIO(phylip_text5a) list5 = list(PhylipIterator(handle)) handle.close() handle = StringIO() PhylipWriter(handle).write_file(list5) handle.seek(0) list6 = list(PhylipIterator(handle)) self.assertEqual(len(list5), len(list6)) for a1, a2 in zip(list5, list6): self.assertEqual(len(a1), len(a2)) for r1, r2 in zip(a1, a2): self.assertEqual(r1.id, r2.id) self.assertEqual(str(r1.seq), str(r2.seq))
def check(self, sff_name, sff_format, out_name, format): wanted = list(SeqIO.parse(out_name, format)) data = StringIO() count = SeqIO.convert(sff_name, sff_format, data, format) self.assertEqual(count, len(wanted)) data.seek(0) converted = list(SeqIO.parse(data, format)) self.assertEqual(len(wanted), len(converted)) for old, new in zip(wanted, converted): self.assertEqual(old.id, new.id) self.assertEqual(old.name, new.name) if format != "qual": self.assertEqual(str(old.seq), str(new.seq)) elif format != "fasta": self.assertEqual(old.letter_annotations["phred_quality"], new.letter_annotations["phred_quality"])
def check(self, sff_name, sff_format, out_name, format) : wanted = list(SeqIO.parse(out_name, format)) data = StringIO() count = SeqIO.convert(sff_name, sff_format, data, format) self.assertEqual(count, len(wanted)) data.seek(0) converted = list(SeqIO.parse(data, format)) self.assertEqual(len(wanted), len(converted)) for old, new in zip(wanted, converted) : self.assertEqual(old.id, new.id) self.assertEqual(old.name, new.name) if format!="qual" : self.assertEqual(str(old.seq), str(new.seq)) elif format!="fasta" : self.assertEqual(old.letter_annotations["phred_quality"], new.letter_annotations["phred_quality"])
def test_newick_write(self): """Parse a Nexus file with multiple trees.""" # Tree with internal node labels mem_file = StringIO() tree = Phylo.read(StringIO("(A,B,(C,D)E)F;"), "newick") Phylo.write(tree, mem_file, "newick") mem_file.seek(0) tree2 = Phylo.read(mem_file, "newick") # Sanity check self.assertEqual(tree2.count_terminals(), 4) # Check internal node labels were retained internal_names = { c.name for c in tree2.get_nonterminals() if c is not None } self.assertEqual(internal_names, {"E", "F"})
def test_sanger_to_illumina(self): """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)""" seq = "N"*94 qual = "".join(chr(33+q) for q in range(0, 94)) expected_phred = [min(62, q) for q in range(0, 94)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", BiopythonWarning) SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle, "fastq-illumina") self.assertTrue(len(w) <= 1, w) out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-illumina") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_sanger_to_illumina(self): """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)""" seq = "N" * 94 qual = "".join(chr(33 + q) for q in range(0, 94)) expected_phred = [min(62, q) for q in range(0, 94)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", BiopythonWarning) SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle, "fastq-illumina") self.assertTrue(len(w) <= 1, w) out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-illumina") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_sanger_to_illumina(self): """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)""" seq = "N" * 94 qual = "".join(chr(33 + q) for q in range(0, 94)) expected_phred = [min(62, q) for q in range(0, 94)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() #Want to ignore the data loss warning #(on Python 2.6 we could check for it!) warnings.simplefilter('ignore', BiopythonWarning) SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle, "fastq-illumina") warnings.filters.pop() out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-illumina") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_sanger_to_illumina(self): """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)""" seq = "N"*94 qual = "".join(chr(33+q) for q in range(0,94)) expected_phred = [min(62,q) for q in range(0,94)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq,qual)) out_handle = StringIO() #Want to ignore the data loss warning #(on Python 2.6 we could check for it!) warnings.simplefilter('ignore', BiopythonWarning) SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle, "fastq-illumina") warnings.filters.pop() out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-illumina") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_solexa_to_sanger(self): """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)""" # The point of this test is the writing code doesn't actually use the # solexa_quality_from_phred function directly. For speed it uses a # cached dictionary of the mappings. seq = "N" * 68 qual = "".join(chr(64 + q) for q in range(-5, 63)) expected_phred = [round(QualityIO.phred_quality_from_solexa(q)) for q in range(-5, 63)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle, "fastq-sanger") out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_solexa_to_sanger(self): """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)""" # The point of this test is the writing code doesn't actually use the # solexa_quality_from_phred function directly. For speed it uses a # cached dictionary of the mappings. seq = "N"*68 qual = "".join(chr(64+q) for q in range(-5, 63)) expected_phred = [round(QualityIO.phred_quality_from_solexa(q)) for q in range(-5, 63)] in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual)) out_handle = StringIO() SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"), out_handle, "fastq-sanger") out_handle.seek(0) record = SeqIO.read(out_handle, "fastq-sanger") self.assertEqual(str(record.seq), seq) self.assertEqual(record.letter_annotations["phred_quality"], expected_phred)
def test_empty_file(self): print("Reading an empty file") assert 0 == len(list(NexusIterator(StringIO()))) print("Done") print("") print("Writing...") handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) print(handle.read()) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass
def test_multiple_output(self): records = [SeqRecord(Seq("ATGCTGCTGAT", alphabet=ambiguous_dna), id="foo"), SeqRecord(Seq("ATGCTGCAGAT", alphabet=ambiguous_dna), id="bar"), SeqRecord(Seq("ATGCTGCGGAT", alphabet=ambiguous_dna), id="baz")] a = MultipleSeqAlignment(records, alphabet=ambiguous_dna) handle = StringIO() NexusWriter(handle).write_file([a]) handle.seek(0) data = handle.read() self.assertTrue(data.startswith("#NEXUS\nbegin data;\n"), data) self.assertTrue(data.endswith("end;\n"), data) handle = StringIO() try: NexusWriter(handle).write_file([a, a]) assert False, "Should have rejected more than one alignment!" except ValueError: pass