Exemple #1
0
 def test_save(self):
     trieobj = trie.trie()
     trieobj["foo"] = 1
     k = trieobj.keys()
     self.assertEqual(k, ["foo"])
     v = trieobj.values()
     self.assertEqual(v, [1])
     self.assertEqual(trieobj.get("bar", 99), 99)
     trieobj["hello"] = "55a"
     self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foa", 0), [])
     self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)])
     x = sorted(trieobj.get_approximate("foa", 2))
     self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)])
     # foo  foo-  foo-
     # foa  f-oa  fo-a
     # mismatch a->o
     # insertion after f, deletion of o
     # insertion after o, deletion of o
     x = trieobj.get_approximate("foo", 4)
     y = {}
     for z in x:
         y[z] = y.get(z, 0) + 1
     x = sorted(y.items())
     self.assertEqual(x, [(("foo", 1, 0), 1), (("hello", "55a", 4), 6)])
     h = StringIO()
     trie.save(h, trieobj)
     h.seek(0)
     trieobj = trie.load(h)
     k = trieobj.keys()
     self.assertTrue("foo" in k)
     self.assertTrue("hello" in k)
     self.assertEqual(repr(trieobj["foo"]), "1")
     self.assertEqual(repr(trieobj["hello"]), "'55a'")
 def test_write_species(self):
     """Test writing species from annotation tags."""
     record = SeqIO.read("SwissProt/sp016", "swiss")
     self.assertEqual(record.annotations["organism"], "H**o sapiens (Human)")
     self.assertEqual(record.annotations["ncbi_taxid"], ["9606"])
     handle = StringIO()
     SeqIO.write(record, handle, "seqxml")
     handle.seek(0)
     output = handle.getvalue()
     self.assertTrue("H**o sapiens (Human)" in output)
     self.assertTrue("9606" in output)
     if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output:
         # Good, but don't get this (do we?)
         pass
     elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output:
         # Not as concise, but fine (seen on C Python)
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output:
         # Jython uses a different order
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output:
         # This would be fine too, but don't get this (do we?)
         pass
     else:
         raise ValueError("Mising expected <species> tag: %r" % output)
    def test_phenotype_IO(self):
        '''Test basic functionalities of phenotype IO methods'''
        p1 = phenotype.read(SMALL_JSON_PLATE, 'pm-json')
        p2 = next(phenotype.parse(SMALL_CSV_PLATES, 'pm-csv'))

        handle = StringIO()

        c = phenotype.write([p1, p2], handle, 'pm-json')
        self.assertEqual(c, 2)

        handle.flush()
        handle.seek(0)
        # Now ready to read back from the handle...
        try:
            records = list(phenotype.parse(handle, 'pm-json'))
        except ValueError as e:
            # This is BAD.  We can't read our own output.
            # I want to see the output when called from the test harness,
            # run_tests.py (which can be funny about new lines on Windows)
            handle.seek(0)
            raise ValueError("%s\n\n%s\n\n%s"
                             % (str(e), repr(handle.read()), repr(records)))

        self.assertEqual(p1, records[0])

        handle.close()
        handle = StringIO()
        self.assertRaises(TypeError, phenotype.write, p1, handle, 1)
        self.assertRaises(ValueError, phenotype.write, p1, handle, 'PM-JSON')
        self.assertRaises(ValueError, phenotype.write, p1, handle, 'pm-csv')
        handle.close()
 def test_generated(self):
     """Write and read back odd SeqRecord objects"""
     record1 = SeqRecord(Seq("ACGT"*500, generic_dna),  id="Test", description="Long "*500,
                        letter_annotations={"phred_quality":[40,30,20,10]*500})
     record2 = SeqRecord(MutableSeq("NGGC"*1000),  id="Mut", description="very "*1000+"long",
                        letter_annotations={"phred_quality":[0,5,5,10]*1000})
     record3 = SeqRecord(UnknownSeq(2000,character="N"),  id="Unk", description="l"+("o"*1000)+"ng",
                        letter_annotations={"phred_quality":[0,1]*1000})
     record4 = SeqRecord(Seq("ACGT"*500),  id="no_descr", description="", name="",
                        letter_annotations={"phred_quality":[40,50,60,62]*500})
     record5 = SeqRecord(Seq("",generic_dna),  id="empty_p", description="(could have been trimmed lots)",
                        letter_annotations={"phred_quality":[]})
     record6 = SeqRecord(Seq(""),  id="empty_s", description="(could have been trimmed lots)",
                        letter_annotations={"solexa_quality":[]})
     record7 = SeqRecord(Seq("ACNN"*500),  id="Test_Sol", description="Long "*500,
                        letter_annotations={"solexa_quality":[40,30,0,-5]*500})
     record8 = SeqRecord(Seq("ACGT"),  id="HighQual", description="With very large qualities that even Sanger FASTQ can't hold!",
                        letter_annotations={"solexa_quality":[0,10,100,1000]})
     #TODO - Record with no identifier?
     records = [record1, record2, record3, record4, record5, record6, record7, record8]
     #TODO - Have a Biopython defined "DataLossWarning?"
     warnings.simplefilter('ignore', BiopythonWarning)
     #TODO - Include phd output?
     for format in ["fasta", "fastq", "fastq-solexa", "fastq-illumina", "qual"]:
         handle = StringIO()
         SeqIO.write(records, handle, format)
         handle.seek(0)
         compare_records(records,
                         list(SeqIO.parse(handle, format)),
                         truncation_expected(format))
     warnings.filters.pop()
    def test_genbank_date_list(self):
        """Check if date lists are handled correctly"""

        sequence_object = Seq("ATGC", generic_dna)
        record = SeqRecord(sequence_object,
                           id='123456789',
                           name='UnitTest',
                           description='Test case for date parsing')
        record.annotations["date"] = ["24-DEC-2015"]
        handle = StringIO()
        SeqIO.write(record, handle, 'genbank')
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "24-DEC-2015")

        record = SeqRecord(sequence_object,
                           id='123456789',
                           name='UnitTest',
                           description='Test case for date parsing')
        record.annotations["date"] = ["24-DEC-2015", "25-JAN-2016"]
        handle = StringIO()
        SeqIO.write(record, handle, 'genbank')
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "01-JAN-1980")
Exemple #6
0
 def __str__(self):
     """Create a string representation of the MarkovModel object."""
     from Bio._py3k import StringIO
     handle = StringIO()
     save(self, handle)
     handle.seek(0)
     return handle.read()
 def test_generated(self):
     """Write and read back odd SeqRecord objects"""
     record1 = SeqRecord(Seq("ACGT" * 500, generic_dna), id="Test", description="Long " * 500,
                        letter_annotations={"phred_quality": [40, 30, 20, 10] * 500})
     record2 = SeqRecord(MutableSeq("NGGC" * 1000), id="Mut", description="very " * 1000 + "long",
                        letter_annotations={"phred_quality": [0, 5, 5, 10] * 1000})
     record3 = SeqRecord(UnknownSeq(2000, character="N"), id="Unk", description="l" + ("o" * 1000) + "ng",
                        letter_annotations={"phred_quality": [0, 1] * 1000})
     record4 = SeqRecord(Seq("ACGT" * 500), id="no_descr", description="", name="",
                        letter_annotations={"phred_quality": [40, 50, 60, 62] * 500})
     record5 = SeqRecord(Seq("", generic_dna), id="empty_p", description="(could have been trimmed lots)",
                        letter_annotations={"phred_quality": []})
     record6 = SeqRecord(Seq(""), id="empty_s", description="(could have been trimmed lots)",
                        letter_annotations={"solexa_quality": []})
     record7 = SeqRecord(Seq("ACNN" * 500), id="Test_Sol", description="Long " * 500,
                        letter_annotations={"solexa_quality": [40, 30, 0, -5] * 500})
     record8 = SeqRecord(Seq("ACGT"), id="HighQual", description="With very large qualities that even Sanger FASTQ can't hold!",
                        letter_annotations={"solexa_quality": [0, 10, 100, 1000]})
     # TODO - Record with no identifier?
     records = [record1, record2, record3, record4, record5, record6, record7, record8]
     for format in ["fasta", "fastq", "fastq-solexa", "fastq-illumina", "qual"]:
         handle = StringIO()
         with warnings.catch_warnings():
             # TODO - Have a Biopython defined "DataLossWarning?"
             warnings.simplefilter('ignore', BiopythonWarning)
             SeqIO.write(records, handle, format)
         handle.seek(0)
         compare_records(records,
                         list(SeqIO.parse(handle, format)),
                         truncation_expected(format))
def check_convert_fails(in_filename, in_format, out_format, alphabet=None):
    qual_truncate = truncation_expected(out_format)
    #We want the SAME error message from parse/write as convert!
    err1 = None
    try:
        records = list(SeqIO.parse(in_filename,in_format, alphabet))
        handle = StringIO()
        if qual_truncate:
            warnings.simplefilter('ignore', UserWarning)
        SeqIO.write(records, handle, out_format)
        if qual_truncate:
            warnings.filters.pop()
        handle.seek(0)
        assert False, "Parse or write should have failed!"
    except ValueError as err:
        err1 = err
    #Now do the conversion...
    try:
        handle2 = StringIO()
        if qual_truncate:
            warnings.simplefilter('ignore', UserWarning)
        SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
        if qual_truncate:
            warnings.filters.pop()
        assert False, "Convert should have failed!"
    except ValueError as err2:
        assert str(err1) == str(err2), \
               "Different failures, parse/write:\n%s\nconvert:\n%s" \
               % (err1, err2)
    def test_genbank_date_list(self):
        """Check if date lists are handled correctly"""

        sequence_object = Seq("ATGC", generic_dna)
        record = SeqRecord(sequence_object,
                           id='123456789',
                           name='UnitTest',
                           description='Test case for date parsing')
        record.annotations["date"] = ["24-DEC-2015"]
        handle = StringIO()
        SeqIO.write(record, handle, 'genbank')
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "24-DEC-2015")

        record = SeqRecord(sequence_object,
                           id='123456789',
                           name='UnitTest',
                           description='Test case for date parsing')
        record.annotations["date"] = ["24-DEC-2015", "25-JAN-2016"]
        handle = StringIO()
        SeqIO.write(record, handle, 'genbank')
        handle.seek(0)
        gb = SeqIO.read(handle, "gb")
        self.assertEqual(gb.annotations["date"], "01-JAN-1980")
Exemple #10
0
    def test_000_write_invalid_but_parsed_locus_line(self):
        """Make sure we survive writing slightly invalid LOCUS lines we could parse."""
        # grab a valid file
        with open(path.join('GenBank', 'NC_005816.gb'), 'r') as handle:
            lines = handle.readlines()

        # futz with the molecule type to make it lower case
        invalid_line = "LOCUS       NC_005816               9609 bp    dna     circular BCT 21-JUL-2008\n"
        lines[0] = invalid_line
        fake_handle = StringIO("".join(lines))

        # Make sure parsing this actually raises a warning
        with warnings.catch_warnings(record=True) as caught:
            warnings.simplefilter("always")
            rec = SeqIO.read(fake_handle, 'genbank')
            self.assertEqual(len(caught), 1)
            self.assertEqual(caught[0].category, BiopythonParserWarning)
            self.assertEqual(str(caught[0].message), "Non-upper case molecule type in LOCUS line: dna")

        out_handle = StringIO()

        ret = SeqIO.write([rec], out_handle, 'genbank')
        self.assertEqual(ret, 1)

        out_handle.seek(0)
        out_lines = out_handle.readlines()
        self.assertEqual(out_lines[0], invalid_line)
Exemple #11
0
 def test_write_species(self):
     """Test writing species from annotation tags."""
     record = SeqIO.read("SwissProt/sp016", "swiss")
     self.assertEqual(record.annotations["organism"],
                      "H**o sapiens (Human)")
     self.assertEqual(record.annotations["ncbi_taxid"], ["9606"])
     handle = StringIO()
     SeqIO.write(record, handle, "seqxml")
     handle.seek(0)
     output = handle.getvalue()
     self.assertIn("H**o sapiens (Human)", output)
     self.assertIn("9606", output)
     if '<species name="H**o sapiens (Human)" ncbiTaxID="9606"/>' in output:
         # Good, but don't get this (do we?)
         pass
     elif '<species name="H**o sapiens (Human)" ncbiTaxID="9606"></species>' in output:
         # Not as concise, but fine (seen on C Python)
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"></species>' in output:
         # Jython uses a different order
         pass
     elif '<species ncbiTaxID="9606" name="H**o sapiens (Human)"/>' in output:
         # This would be fine too, but don't get this (do we?)
         pass
     else:
         raise ValueError("Mising expected <species> tag: %r" % output)
    def test_000_write_invalid_but_parsed_locus_line(self):
        """Make sure we survive writing slightly invalid LOCUS lines we could parse."""
        # grab a valid file
        with open(path.join('GenBank', 'NC_005816.gb'), 'r') as handle:
            lines = handle.readlines()

        # futz with the molecule type to make it lower case
        invalid_line = "LOCUS       NC_005816               9609 bp    dna     circular BCT 21-JUL-2008\n"
        lines[0] = invalid_line
        fake_handle = StringIO("".join(lines))

        # Make sure parsing this actually raises a warning
        with warnings.catch_warnings(record=True) as caught:
            warnings.simplefilter("always")
            rec = SeqIO.read(fake_handle, 'genbank')
            self.assertEqual(len(caught), 1)
            self.assertEqual(caught[0].category, BiopythonParserWarning)
            self.assertEqual(str(caught[0].message), "Non-upper case molecule type in LOCUS line: dna")

        out_handle = StringIO()

        ret = SeqIO.write([rec], out_handle, 'genbank')
        self.assertEqual(ret, 1)

        out_handle.seek(0)
        out_lines = out_handle.readlines()
        self.assertEqual(out_lines[0], invalid_line)
Exemple #13
0
def check_convert_fails(in_filename, in_format, out_format, alphabet=None):
    qual_truncate = truncation_expected(out_format)
    # We want the SAME error message from parse/write as convert!
    err1 = None
    try:
        records = list(SeqIO.parse(in_filename, in_format, alphabet))
        handle = StringIO()
        with warnings.catch_warnings():
            if qual_truncate:
                warnings.simplefilter("ignore", BiopythonWarning)
            SeqIO.write(records, handle, out_format)
        handle.seek(0)
        raise ValueError("Parse or write should have failed!")
    except ValueError as err:
        err1 = err
    # Now do the conversion...
    try:
        handle2 = StringIO()
        with warnings.catch_warnings():
            if qual_truncate:
                warnings.simplefilter("ignore", BiopythonWarning)
            SeqIO.convert(in_filename, in_format, handle2, out_format,
                          alphabet)
        raise ValueError("Convert should have failed!")
    except ValueError as err2:
        assert str(err1) == str(err2), \
            "Different failures, parse/write:\n%s\nconvert:\n%s" \
            % (err1, err2)
Exemple #14
0
 def test_save(self):
     trieobj = trie.trie()
     trieobj["foo"] = 1
     k = list(trieobj.keys())
     self.assertEqual(k, ["foo"])
     v = list(trieobj.values())
     self.assertEqual(v, [1])
     self.assertEqual(trieobj.get("bar", 99), 99)
     trieobj["hello"] = '55a'
     self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foa", 0), [])
     self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)])
     x = sorted(trieobj.get_approximate("foa", 2))
     self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)])
     # foo  foo-  foo-
     # foa  f-oa  fo-a
     # mismatch a->o
     # insertion after f, deletion of o
     # insertion after o, deletion of o
     x = trieobj.get_approximate("foo", 4)
     y = {}
     for z in x:
         y[z] = y.get(z, 0) + 1
     x = sorted(y.items())
     self.assertEqual(x, [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)])
     h = StringIO()
     trie.save(h, trieobj)
     h.seek(0)
     trieobj = trie.load(h)
     k = list(trieobj.keys())
     self.assertTrue("foo" in k)
     self.assertTrue("hello" in k)
     self.assertEqual(repr(trieobj["foo"]), '1')
     self.assertEqual(repr(trieobj["hello"]), "'55a'")
Exemple #15
0
 def loop(self, filename, format):
     original_records = list(SeqIO.parse(open(filename, "rU"), format))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                           user = DBUSER, passwd = DBPASSWD,
                                           host = DBHOST, db = TESTDB)
     db_name = "test_loop_%s" % filename  # new namespace!
     db = server.new_database(db_name)
     count = db.load(original_records)
     self.assertEqual(count, len(original_records))
     server.commit()
     #Now read them back...
     biosql_records = [db.lookup(name=rec.name)
                       for rec in original_records]
     #And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
     #Now write to a handle...
     handle = StringIO()
     SeqIO.write(biosql_records, handle, "gb")
     #Now read them back...
     handle.seek(0)
     new_records = list(SeqIO.parse(handle, "gb"))
     #And check they still agree
     self.assertEqual(len(new_records), len(original_records))
     for old, new in zip(original_records, new_records):
         #TODO - remove this hack because we don't yet write these (yet):
         for key in ["comment", "references", "db_source"]:
             if key in old.annotations and key not in new.annotations:
                 del old.annotations[key]
         self.assertTrue(compare_record(old, new))
     #Done
     server.close()
Exemple #16
0
 def __str__(self):
     """Create a string representation of the MarkovModel object."""
     from Bio._py3k import StringIO
     handle = StringIO()
     save(self, handle)
     handle.seek(0)
     return handle.read()
    def test_save_and_load(self):
        states = "NR"
        alphabet = "AGTC"
        p_initial = array([1.0, 0.0])
        p_transition = array([[0.75, 0.25], [0.25, 0.75]])
        p_emission = array(
            [[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]])
        markov_model_save = MarkovModel.MarkovModel(
            states,
            alphabet,
            p_initial,
            p_transition,
            p_emission)

        handle = StringIO()
        MarkovModel.save(markov_model_save, handle)
        handle.seek(0)
        markov_model_load = MarkovModel.load(handle)

        self.assertEqual(''.join(markov_model_load.states), states)
        self.assertEqual(''.join(markov_model_load.alphabet), alphabet)
        self.assertTrue(array_equal(markov_model_load.p_initial, p_initial))
        self.assertTrue(array_equal
                        (markov_model_load.p_transition, p_transition))
        self.assertTrue(array_equal(markov_model_load.p_emission, p_emission))
Exemple #18
0
    def test_phenotype_IO(self):
        """Test basic functionalities of phenotype IO methods."""
        p1 = phenotype.read(SMALL_JSON_PLATE, "pm-json")
        p2 = next(phenotype.parse(SMALL_CSV_PLATES, "pm-csv"))

        handle = StringIO()

        c = phenotype.write([p1, p2], handle, "pm-json")
        self.assertEqual(c, 2)

        handle.flush()
        handle.seek(0)
        # Now ready to read back from the handle...
        try:
            records = list(phenotype.parse(handle, "pm-json"))
        except ValueError as e:
            # This is BAD.  We can't read our own output.
            # I want to see the output when called from the test harness,
            # run_tests.py (which can be funny about new lines on Windows)
            handle.seek(0)
            raise ValueError("%s\n\n%s\n\n%s" %
                             (str(e), repr(handle.read()), repr(records)))

        self.assertEqual(p1, records[0])

        handle.close()
        handle = StringIO()
        self.assertRaises(TypeError, phenotype.write, p1, handle, 1)
        self.assertRaises(ValueError, phenotype.write, p1, handle, "PM-JSON")
        self.assertRaises(ValueError, phenotype.write, p1, handle, "pm-csv")
        handle.close()
Exemple #19
0
 def loop(self, filename, format):
     original_records = list(SeqIO.parse(filename, format))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB)
     db_name = "test_loop_%s" % filename  # new namespace!
     db = server.new_database(db_name)
     count = db.load(original_records)
     self.assertEqual(count, len(original_records))
     server.commit()
     # Now read them back...
     biosql_records = [db.lookup(name=rec.name) for rec in original_records]
     # And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
     # Now write to a handle...
     handle = StringIO()
     SeqIO.write(biosql_records, handle, "gb")
     # Now read them back...
     handle.seek(0)
     new_records = list(SeqIO.parse(handle, "gb"))
     # And check they still agree
     self.assertEqual(len(new_records), len(original_records))
     for old, new in zip(original_records, new_records):
         # TODO - remove this hack because we don't yet write these (yet):
         for key in ["comment", "references", "db_source"]:
             if key in old.annotations and key not in new.annotations:
                 del old.annotations[key]
         self.assertTrue(compare_record(old, new))
     # Done
     handle.close()
     server.close()
 def test_write_read(self):
     """Checking write/read."""
     alignments = list(ClustalIterator(StringIO(aln_example1))) + list(ClustalIterator(StringIO(aln_example2))) * 2
     handle = StringIO()
     self.assertEqual(3, ClustalWriter(handle).write_file(alignments))
     handle.seek(0)
     for i, a in enumerate(ClustalIterator(handle)):
         self.assertEqual(a.get_alignment_length(), alignments[i].get_alignment_length())
 def test_write_read(self):
     """Checking write/read."""
     alignments = (list(ClustalIterator(StringIO(aln_example1)))
                   + list(ClustalIterator(StringIO(aln_example2))) * 2)
     handle = StringIO()
     self.assertEqual(3, ClustalWriter(handle).write_file(alignments))
     handle.seek(0)
     for i, a in enumerate(ClustalIterator(handle)):
         self.assertEqual(a.get_alignment_length(), alignments[i].get_alignment_length())
Exemple #22
0
            def read_longer_than_maxsize():
                with open(path.join("GenBank", "DS830848.gb"), 'r') as inhandle:
                    data2 = inhandle.readlines()
                    data2[0] = "LOCUS       AZZZAA02123456789 " + str(sys.maxsize + 1) + " bp    DNA     linear   PRI 15-OCT-2018\n"

                long_in_tmp = StringIO()
                long_in_tmp.writelines(data2)
                long_in_tmp.seek(0)
                record = SeqIO.read(long_in_tmp, 'genbank')
 def test_write_read_single(self):
     """Testing write/read when there is only one sequence."""
     alignment = next(ClustalIterator(StringIO(aln_example1)))
     # Now thae just the first row as a new alignment:
     alignment = alignment[0:1]
     handle = StringIO()
     ClustalWriter(handle).write_file([alignment])
     handle.seek(0)
     for i, a in enumerate(ClustalIterator(handle)):
         self.assertEqual(a.get_alignment_length(), alignment.get_alignment_length())
         self.assertEqual(len(a), 1)
 def test_write_read_single(self):
     """Testing write/read when there is only one sequence."""
     alignment = next(ClustalIterator(StringIO(aln_example1)))
     # Now thae just the first row as a new alignment:
     alignment = alignment[0:1]
     handle = StringIO()
     ClustalWriter(handle).write_file([alignment])
     handle.seek(0)
     for i, a in enumerate(ClustalIterator(handle)):
         self.assertEqual(a.get_alignment_length(), alignment.get_alignment_length())
         self.assertEqual(len(a), 1)
    def _write_parse_and_compare(self, read1_records):

        handle = StringIO()

        SeqIO.write(read1_records, handle, "seqxml")

        handle.seek(0)
        read2_records = list(SeqIO.parse(handle, "seqxml"))

        self.assertEqual(len(read1_records), len(read2_records))

        for record1, record2 in zip(read1_records, read2_records):
            assert_equal_records(self, record1, record2)
Exemple #26
0
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_1 = StringIO()
     mem_file_2 = BytesIO()
     mem_file_3 = StringIO()
     Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus')
     mem_file_1.seek(0)
     Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml')
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick')
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, 'newick')
     self.assertEqual(len(tree.get_terminals()), 28)
Exemple #27
0
    def _write_parse_and_compare(self, read1_records):

        handle = StringIO()

        SeqIO.write(read1_records, handle, "seqxml")

        handle.seek(0)
        read2_records = list(SeqIO.parse(handle, "seqxml"))

        self.assertEqual(len(read1_records), len(read2_records))

        for record1, record2 in zip(read1_records, read2_records):
            assert_equal_records(self, record1, record2)
Exemple #28
0
 def test_genbank_date_default(self):
     """Check if default date is handled correctly."""
     sequence_object = Seq("ATGC", generic_dna)
     # check if default value is inserted correctly
     record = SeqRecord(sequence_object,
                        id='123456789',
                        name='UnitTest',
                        description='Test case for date parsing')
     handle = StringIO()
     SeqIO.write(record, handle, 'genbank')
     handle.seek(0)
     gb = SeqIO.read(handle, "gb")
     self.assertEqual(gb.annotations["date"], "01-JAN-1980")
Exemple #29
0
 def test_genbank_date_correct(self):
     """Check if user provided date is inserted correctly."""
     sequence_object = Seq("ATGC", generic_dna)
     record = SeqRecord(sequence_object,
                        id='123456789',
                        name='UnitTest',
                        description='Test case for date parsing')
     record.annotations["date"] = "24-DEC-2015"
     handle = StringIO()
     SeqIO.write(record, handle, 'genbank')
     handle.seek(0)
     gb = SeqIO.read(handle, "gb")
     self.assertEqual(gb.annotations["date"], "24-DEC-2015")
Exemple #30
0
 def test_genbank_date_datetime(self):
     """Check if datetime objects are handled correctly."""
     sequence_object = Seq("ATGC", generic_dna)
     record = SeqRecord(sequence_object,
                        id='123456789',
                        name='UnitTest',
                        description='Test case for date parsing')
     record.annotations["date"] = datetime(2000, 2, 2)
     handle = StringIO()
     SeqIO.write(record, handle, 'genbank')
     handle.seek(0)
     gb = SeqIO.read(handle, "gb")
     self.assertEqual(gb.annotations["date"], "02-FEB-2000")
Exemple #31
0
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_1 = StringIO()
     mem_file_2 = BytesIO()
     mem_file_3 = StringIO()
     Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus')
     mem_file_1.seek(0)
     Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml')
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick')
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, 'newick')
     self.assertEqual(len(tree.get_terminals()), 28)
Exemple #32
0
 def test_newick_write(self):
     """Parse a Nexus file with multiple trees."""
     # Tree with internal node labels
     mem_file = StringIO()
     tree = Phylo.read(StringIO("(A,B,(C,D)E)F;"), "newick")
     Phylo.write(tree, mem_file, "newick")
     mem_file.seek(0)
     tree2 = Phylo.read(mem_file, "newick")
     # Sanity check
     self.assertEqual(tree2.count_terminals(), 4)
     # Check internal node labels were retained
     internal_names = set(c.name for c in tree2.get_nonterminals() if c is not None)
     self.assertEqual(internal_names, set(("E", "F")))
    def check_rewrite(self, filename):
        old = SeqIO.read(filename, "embl")

        #TODO - Check these properties:
        old.dbxrefs = []
        old.annotations['accessions'] = old.annotations['accessions'][:1]
        del old.annotations['references']

        buffer = StringIO()
        self.assertEqual(1, SeqIO.write(old, buffer, "embl"))
        buffer.seek(0)
        new = SeqIO.read(buffer, "embl")

        self.assertTrue(compare_record(old, new))
Exemple #34
0
    def test_TaggingConsumer(self):

        h = StringIO()
        tc = ParserSupport.TaggingConsumer(handle=h, colwidth=5)
        tc.start_section()
        self.assertEqual(h.getvalue(), "***** start_section\n")
        h.seek(0)
        h.truncate(0)
        tc.test1("myline")
        self.assertEqual(h.getvalue(), "test1: myline\n")
        h.seek(0)
        h.truncate(0)
        tc.end_section()
        self.assertEqual(h.getvalue(), "***** end_section\n")
 def test_illumina_to_sanger(self):
     """Mapping check for FASTQ Illumina (0 to 62) to Sanger (0 to 62)"""
     seq = "N"*63
     qual = "".join(chr(64+q) for q in range(0,63))
     expected_phred = range(63)
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq,qual))
     out_handle = StringIO()
     SeqIO.write(SeqIO.parse(in_handle, "fastq-illumina"),
                 out_handle, "fastq-sanger")
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
Exemple #36
0
    def check_rewrite(self, filename):
        old = SeqIO.read(filename, "embl")

        # TODO - Check these properties:
        old.dbxrefs = []
        old.annotations['accessions'] = old.annotations['accessions'][:1]
        del old.annotations['references']

        buffer = StringIO()
        self.assertEqual(1, SeqIO.write(old, buffer, "embl"))
        buffer.seek(0)
        new = SeqIO.read(buffer, "embl")

        self.assertTrue(compare_record(old, new))
Exemple #37
0
 def test_newick_write(self):
     """Parse a Nexus file with multiple trees."""
     # Tree with internal node labels
     mem_file = StringIO()
     tree = Phylo.read(StringIO('(A,B,(C,D)E)F;'), 'newick')
     Phylo.write(tree, mem_file, 'newick')
     mem_file.seek(0)
     tree2 = Phylo.read(mem_file, 'newick')
     # Sanity check
     self.assertEqual(tree2.count_terminals(), 4)
     # Check internal node labels were retained
     internal_names = set(c.name for c in tree2.get_nonterminals()
                          if c is not None)
     self.assertEqual(internal_names, set(('E', 'F')))
Exemple #38
0
 def test_illumina_to_sanger(self):
     """Mapping check for FASTQ Illumina (0 to 62) to Sanger (0 to 62)"""
     seq = "N" * 63
     qual = "".join(chr(64 + q) for q in range(0, 63))
     expected_phred = range(63)
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     SeqIO.write(SeqIO.parse(in_handle, "fastq-illumina"), out_handle,
                 "fastq-sanger")
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
Exemple #39
0
 def test_long_names(self):
     """Various GenBank names which push the column based LOCUS line."""
     original = SeqIO.read("GenBank/iro.gb", "gb")
     self.assertEqual(len(original), 1326)
     # Acceptability of LOCUS line with length > 80 invalidates some of these tests
     for name, seq_len, ok in [
             ("short", 1, True),
             ("max_length_of_16", 1000, True),
             ("overly_long_at_17", 1000, True),
             ("excessively_long_at_22", 99999, True),
             ("excessively_long_at_22", 100000, True),
             ("pushing_the_limits_at_24", 999, True),
             ("pushing_the_limits_at_24", 1000, True),
             ("old_max_name_length_was_26", 10, True),  # 2 digits
             ("old_max_name_length_was_26", 9, True)]:  # 1 digit
         # Make the length match the desired target
         record = original[:]
         # TODO - Implement Seq * int
         record.seq = Seq("N" * seq_len, original.seq.alphabet)
         # Set the identifer to the desired name
         record.id = record.name = name
         # Attempt to output the record...
         if not ok:
             # e.g. ValueError: Locus identifier 'excessively_long_at_22' is too long
             self.assertRaises(ValueError, record.format, "gb")
             continue
         with warnings.catch_warnings():
             # e.g. BiopythonWarning: Stealing space from length field to
             # allow long name in LOCUS line
             warnings.simplefilter("ignore", BiopythonWarning)
             # output = record.format("gb")
             handle = StringIO()
             self.assertEqual(1, SeqIO.write(record, handle, "gb"))
         handle.seek(0)
         line = handle.readline()
         self.assertIn(" %s " % name, line)
         self.assertIn(" %i bp " % seq_len, line)
         # Splitting based on whitespace rather than position due to
         # updated GenBank specification
         name_and_length = line.split()[1:3]
         self.assertEqual(name_and_length, [name, str(seq_len)], line)
         handle.seek(0)
         with warnings.catch_warnings():
             # e.g. BiopythonParserWarning: GenBank LOCUS line
             # identifier over 16 characters
             warnings.simplefilter("ignore", BiopythonWarning)
             new = SeqIO.read(handle, "gb")
         self.assertEqual(name, new.name)
         self.assertEqual(seq_len, len(new))
    def test_write_read(self):
        handle = open(self.SIMPLE_XMFA, 'r')
        aln_list = list(MauveIterator(handle))
        handle.close()

        handle = StringIO()
        MauveWriter(handle).write_file(aln_list)
        handle.seek(0)
        aln_list_out = list(MauveIterator(handle))

        for a1, a2 in zip(aln_list, aln_list_out):
            self.assertEqual(len(a1), len(a2))
            for r1, r2 in zip(a1, a2):
                self.assertEqual(r1.id, r2.id)
                self.assertEqual(str(r1.seq), str(r2.seq))
    def test_write_read(self):
        handle = open(self.SIMPLE_XMFA, 'r')
        aln_list = list(MauveIterator(handle))
        handle.close()

        handle = StringIO()
        MauveWriter(handle).write_file(aln_list)
        handle.seek(0)
        aln_list_out = list(MauveIterator(handle))

        for a1, a2 in zip(aln_list, aln_list_out):
            self.assertEqual(len(a1), len(a2))
            for r1, r2 in zip(a1, a2):
                self.assertEqual(r1.id, r2.id)
                self.assertEqual(str(r1.seq), str(r2.seq))
    def test_write_read(self):
        handle = StringIO(phylip_text5a)
        list5 = list(PhylipIterator(handle))
        handle.close()

        handle = StringIO()
        PhylipWriter(handle).write_file(list5)
        handle.seek(0)
        list6 = list(PhylipIterator(handle))

        self.assertEqual(len(list5), len(list6))
        for a1, a2 in zip(list5, list6):
            self.assertEqual(len(a1), len(a2))
            for r1, r2 in zip(a1, a2):
                self.assertEqual(r1.id, r2.id)
                self.assertEqual(str(r1.seq), str(r2.seq))
    def test_write_read(self):
        handle = StringIO(phylip_text5a)
        list5 = list(PhylipIterator(handle))
        handle.close()

        handle = StringIO()
        PhylipWriter(handle).write_file(list5)
        handle.seek(0)
        list6 = list(PhylipIterator(handle))

        self.assertEqual(len(list5), len(list6))
        for a1, a2 in zip(list5, list6):
            self.assertEqual(len(a1), len(a2))
            for r1, r2 in zip(a1, a2):
                self.assertEqual(r1.id, r2.id)
                self.assertEqual(str(r1.seq), str(r2.seq))
Exemple #44
0
 def check(self, sff_name, sff_format, out_name, format):
     wanted = list(SeqIO.parse(out_name, format))
     data = StringIO()
     count = SeqIO.convert(sff_name, sff_format, data, format)
     self.assertEqual(count, len(wanted))
     data.seek(0)
     converted = list(SeqIO.parse(data, format))
     self.assertEqual(len(wanted), len(converted))
     for old, new in zip(wanted, converted):
         self.assertEqual(old.id, new.id)
         self.assertEqual(old.name, new.name)
         if format != "qual":
             self.assertEqual(str(old.seq), str(new.seq))
         elif format != "fasta":
             self.assertEqual(old.letter_annotations["phred_quality"],
                              new.letter_annotations["phred_quality"])
 def check(self, sff_name, sff_format, out_name, format) :
     wanted = list(SeqIO.parse(out_name, format))
     data = StringIO()
     count = SeqIO.convert(sff_name, sff_format, data, format)
     self.assertEqual(count, len(wanted))
     data.seek(0)
     converted = list(SeqIO.parse(data, format))
     self.assertEqual(len(wanted), len(converted))
     for old, new in zip(wanted, converted) :
         self.assertEqual(old.id, new.id)
         self.assertEqual(old.name, new.name)
         if format!="qual" :
             self.assertEqual(str(old.seq), str(new.seq))
         elif format!="fasta" :
             self.assertEqual(old.letter_annotations["phred_quality"],
                              new.letter_annotations["phred_quality"])
Exemple #46
0
 def test_newick_write(self):
     """Parse a Nexus file with multiple trees."""
     # Tree with internal node labels
     mem_file = StringIO()
     tree = Phylo.read(StringIO("(A,B,(C,D)E)F;"), "newick")
     Phylo.write(tree, mem_file, "newick")
     mem_file.seek(0)
     tree2 = Phylo.read(mem_file, "newick")
     # Sanity check
     self.assertEqual(tree2.count_terminals(), 4)
     # Check internal node labels were retained
     internal_names = {
         c.name
         for c in tree2.get_nonterminals() if c is not None
     }
     self.assertEqual(internal_names, {"E", "F"})
 def test_sanger_to_illumina(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)"""
     seq = "N"*94
     qual = "".join(chr(33+q) for q in range(0, 94))
     expected_phred = [min(62, q) for q in range(0, 94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always", BiopythonWarning)
         SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"),
                     out_handle, "fastq-illumina")
         self.assertTrue(len(w) <= 1, w)
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-illumina")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
Exemple #48
0
 def test_sanger_to_illumina(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)"""
     seq = "N" * 94
     qual = "".join(chr(33 + q) for q in range(0, 94))
     expected_phred = [min(62, q) for q in range(0, 94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always", BiopythonWarning)
         SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle,
                     "fastq-illumina")
         self.assertTrue(len(w) <= 1, w)
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-illumina")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
Exemple #49
0
 def test_sanger_to_illumina(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)"""
     seq = "N" * 94
     qual = "".join(chr(33 + q) for q in range(0, 94))
     expected_phred = [min(62, q) for q in range(0, 94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     #Want to ignore the data loss warning
     #(on Python 2.6 we could check for it!)
     warnings.simplefilter('ignore', BiopythonWarning)
     SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"), out_handle,
                 "fastq-illumina")
     warnings.filters.pop()
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-illumina")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
 def test_sanger_to_illumina(self):
     """Mapping check for FASTQ Sanger (0 to 93) to Illumina (0 to 62)"""
     seq = "N"*94
     qual = "".join(chr(33+q) for q in range(0,94))
     expected_phred = [min(62,q) for q in range(0,94)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq,qual))
     out_handle = StringIO()
     #Want to ignore the data loss warning
     #(on Python 2.6 we could check for it!)
     warnings.simplefilter('ignore', BiopythonWarning)
     SeqIO.write(SeqIO.parse(in_handle, "fastq-sanger"),
                 out_handle, "fastq-illumina")
     warnings.filters.pop()
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-illumina")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
 def test_solexa_to_sanger(self):
     """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)"""
     # The point of this test is the writing code doesn't actually use the
     # solexa_quality_from_phred function directly. For speed it uses a
     # cached dictionary of the mappings.
     seq = "N" * 68
     qual = "".join(chr(64 + q) for q in range(-5, 63))
     expected_phred = [round(QualityIO.phred_quality_from_solexa(q))
                       for q in range(-5, 63)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"),
                 out_handle, "fastq-sanger")
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
 def test_solexa_to_sanger(self):
     """Mapping check for FASTQ Solexa (-5 to 62) to Sanger (0 to 62)"""
     # The point of this test is the writing code doesn't actually use the
     # solexa_quality_from_phred function directly. For speed it uses a
     # cached dictionary of the mappings.
     seq = "N"*68
     qual = "".join(chr(64+q) for q in range(-5, 63))
     expected_phred = [round(QualityIO.phred_quality_from_solexa(q))
                       for q in range(-5, 63)]
     in_handle = StringIO("@Test\n%s\n+\n%s" % (seq, qual))
     out_handle = StringIO()
     SeqIO.write(SeqIO.parse(in_handle, "fastq-solexa"),
                 out_handle, "fastq-sanger")
     out_handle.seek(0)
     record = SeqIO.read(out_handle, "fastq-sanger")
     self.assertEqual(str(record.seq), seq)
     self.assertEqual(record.letter_annotations["phred_quality"],
                      expected_phred)
Exemple #53
0
        def test_empty_file(self):

            print("Reading an empty file")
            assert 0 == len(list(NexusIterator(StringIO())))
            print("Done")
            print("")
            print("Writing...")

            handle = StringIO()
            NexusWriter(handle).write_file([a])
            handle.seek(0)
            print(handle.read())

            handle = StringIO()
            try:
                NexusWriter(handle).write_file([a, a])
                assert False, "Should have rejected more than one alignment!"
            except ValueError:
                pass
Exemple #54
0
    def test_multiple_output(self):
        records = [SeqRecord(Seq("ATGCTGCTGAT", alphabet=ambiguous_dna), id="foo"),
                   SeqRecord(Seq("ATGCTGCAGAT", alphabet=ambiguous_dna), id="bar"),
                   SeqRecord(Seq("ATGCTGCGGAT", alphabet=ambiguous_dna), id="baz")]
        a = MultipleSeqAlignment(records, alphabet=ambiguous_dna)

        handle = StringIO()
        NexusWriter(handle).write_file([a])
        handle.seek(0)
        data = handle.read()
        self.assertTrue(data.startswith("#NEXUS\nbegin data;\n"), data)
        self.assertTrue(data.endswith("end;\n"), data)

        handle = StringIO()
        try:
            NexusWriter(handle).write_file([a, a])
            assert False, "Should have rejected more than one alignment!"
        except ValueError:
            pass