def load_blocks(self, location, blocks, ranges, query): """ Load one or more blocks of compressed cdx lines, return a line iterator which decompresses and returns one line at a time, bounded by query.key and query.end_key """ if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG): msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}' logging.debug(msg.format(b=blocks, loc=location)) reader = self.blk_loader.load(location, blocks.offset, blocks.length) def decompress_block(range_): decomp = gzip_decompressor() buff = decomp.decompress(reader.read(range_)) for line in BytesIO(buff): yield line def iter_blocks(reader): try: for r in ranges: yield decompress_block(r) finally: reader.close() # iterate over all blocks iter_ = itertools.chain.from_iterable(iter_blocks(reader)) # start bound iter_ = linearsearch(iter_, query.key) # end bound iter_ = itertools.takewhile(lambda line: line < query.end_key, iter_) return iter_
def load_blocks(self, location, blocks, ranges, query): """ Load one or more blocks of compressed cdx lines, return a line iterator which decompresses and returns one line at a time, bounded by query.key and query.end_key """ if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG): msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}' logging.debug(msg.format(b=blocks, loc=location)) reader = self.blk_loader.load(location, blocks.offset, blocks.length) def decompress_block(range_): decomp = gzip_decompressor() buff = decomp.decompress(reader.read(range_)) for line in BytesIO(buff): yield line iter_ = itertools.chain(*itertools.imap(decompress_block, ranges)) # start bound iter_ = linearsearch(iter_, query.key) # end bound end = query.end_key iter_ = itertools.takewhile(lambda line: line < end, iter_) return iter_
def load_blocks(self, location, blocks, ranges, query): if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG): msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}' logging.debug(msg.format(b=blocks, loc=location)) reader = self.blk_loader.load(location, blocks.offset, blocks.length) def decompress_block(range_): decomp = gzip_decompressor() buff = decomp.decompress(reader.read(range_)) return readline_to_iter(BytesIO(buff)) iter_ = itertools.chain(*itertools.imap(decompress_block, ranges)) # start bound iter_ = linearsearch(iter_, query.key) # end bound end = query.end_key iter_ = itertools.takewhile(lambda line: line < end, iter_) return iter_