class SourceFile(object): serial = 0 buffer_ = None def __init__(self, source_file, bs=None): self.__file = open(source_file, 'rb') self.bs = bs or 64 def __file_content_to_buffer(self, type_='block', size=32): # 根据type_的类型,size也将会表现为不同的类型 # 对于block,size表现为“兆字节”;对于row,size表现为“千行” if type_ == 'block': real_size = size * 1024 * 1024 file_content = self.__file.read(real_size) if not file_content: self.__file.close() self.buffer_ = io.BytesIO() return self.buffer_ = io.BytesIO(file_content) else: # type == 'row' self.buffer_ = Queue() try: for i in range(size * 1000): self.buffer_.put(next(self.__file)) except StopIteration: self.__file.close() def __read_from_buffer(self, type_='block'): if not self.buffer_: self.__file_content_to_buffer(type_) if type_ == 'block': data = self.buffer_.read(self.bs) if data: return data self.buffer_ = None if self.__file.closed: return None # 若文件尚未关闭,则表示还有数据待读 # 置空buffer递归自身一次即可返回数据 return self.__read_from_buffer(type_) else: # type == 'row' try: return self.buffer_.get_nowait() except Empty: self.buffer_ = None if self.__file.closed: return None # 同上,置空buffer并递归自身一次 return self.__read_from_buffer(type_) def _next(self, type_): data = self.__read_from_buffer(type_) if data: res = { 'block': data, 'serial': self.serial, } self.serial += 1 return res return None def _next_block(self): return self._next('block') def _next_row(self): return self._next('row')