def check_blocks(self, old_file, new_file):
     h = open(old_file, "rb")
     old = list(bgzf.BgzfBlocks(h))
     h.close()
     h = open(new_file, "rb")
     new = list(bgzf.BgzfBlocks(h))
     h.close()
     self.assertEqual(len(old), len(new))
     self.assertEqual(old, new)
Exemple #2
0
    def check_blocks(self, old_file, new_file):
        with open(old_file, "rb") as h:
            old = list(bgzf.BgzfBlocks(h))

        with open(new_file, "rb") as h:
            new = list(bgzf.BgzfBlocks(h))

        self.assertEqual(len(old), len(new))
        self.assertEqual(old, new)
Exemple #3
0
 def populate_blocks():
     with open(self.bgzipped_fasta_filename,
               'rb') as fasta_file_for_blocks:
         # Store uncompressed data start, uncompressed data end, compressed block start as a tuple
         blocks = [(b[2], b[2] + b[3], b[0])
                   for b in bgzf.BgzfBlocks(fasta_file_for_blocks)]
         verbose_print('\t\tFound {} blocks in {}'.format(
             len(blocks),
             datetime.datetime.now() - start_time))
     return blocks
    def check_random(self, filename):
        """Check BGZF random access by reading blocks in forward & reverse order"""
        h = gzip.open(filename, "rb")
        old = h.read()
        h.close()

        h = open(filename, "rb")
        blocks = list(bgzf.BgzfBlocks(h))
        h.close()

        #Forward
        new = _empty_bytes_string
        h = bgzf.BgzfReader(filename, "rb")
        self.assertTrue(h.seekable())
        self.assertFalse(h.isatty())
        self.assertEqual(h.fileno(), h._handle.fileno())
        for start, raw_len, data_start, data_len in blocks:
            #print start, raw_len, data_start, data_len
            h.seek(bgzf.make_virtual_offset(start,0))
            data = h.read(data_len)
            self.assertEqual(len(data), data_len)
            #self.assertEqual(start + raw_len, h._handle.tell())
            self.assertEqual(len(new), data_start)
            new += data
        h.close()
        self.assertEqual(len(old), len(new))
        self.assertEqual(old, new)

        #Reverse
        new = _empty_bytes_string
        h = bgzf.BgzfReader(filename, "rb")
        for start, raw_len, data_start, data_len in blocks[::-1]:
            #print start, raw_len, data_start, data_len
            h.seek(bgzf.make_virtual_offset(start,0))
            data = h.read(data_len)
            self.assertEqual(len(data), data_len)
            #self.assertEqual(start + raw_len, h._handle.tell())
            new = data + new
        h.close()
        self.assertEqual(len(old), len(new))
        self.assertEqual(old, new)

        #Jump back - non-sequential seeking
        if len(blocks) >= 3:
            h = bgzf.BgzfReader(filename, "rb", max_cache = 1)
            #Seek to a late block in the file,
            #half way into the third last block
            start, raw_len, data_start, data_len = blocks[-3]
            voffset = bgzf.make_virtual_offset(start, data_len // 2)
            h.seek(voffset)
            self.assertEqual(voffset, h.tell())
            data = h.read(1000)
            self.assertTrue(data in old)
            self.assertEqual(old.find(data), data_start + data_len // 2)
            #Now seek to an early block in the file,
            #half way into the second block
            start, raw_len, data_start, data_len = blocks[1]
            h.seek(bgzf.make_virtual_offset(start, data_len // 2))
            voffset = bgzf.make_virtual_offset(start, data_len // 2)
            h.seek(voffset)
            self.assertEqual(voffset, h.tell())
            #Now read all rest of this block and start of next block
            data = h.read(data_len + 1000)
            self.assertTrue(data in old)
            self.assertEqual(old.find(data), data_start + data_len // 2)
            h.close()

        #Check seek/tell at block boundaries
        v_offsets = []
        for start, raw_len, data_start, data_len in blocks:
            for within_offset in [0, 1, data_len // 2, data_len - 1]:
                if within_offset < 0 or data_len <= within_offset:
                    continue
                voffset = bgzf.make_virtual_offset(start, within_offset)
                real_offset = data_start + within_offset
                v_offsets.append((voffset, real_offset))
        shuffle(v_offsets)
        h = bgzf.BgzfReader(filename, "rb", max_cache = 1)
        for voffset, real_offset in v_offsets:
            h.seek(0)
            assert voffset >= 0 and real_offset >= 0
            self.assertEqual(h.read(real_offset), old[:real_offset])
            self.assertEqual(h.tell(), voffset)
        for voffset, real_offset in v_offsets:
            h.seek(voffset)
            self.assertEqual(h.tell(), voffset)
        h.close()
Exemple #5
0
 def test_BgzfBlocks_TypeError(self):
     """Check get expected TypeError from BgzfBlocks."""
     for mode in ("r", "rb"):
         decompressed = bgzf.open("GenBank/cor6_6.gb.bgz", mode)
         with self.assertRaises(TypeError):
             list(bgzf.BgzfBlocks(decompressed))