def _dump_to_h5(stream: BytesIO, store: tables.File, file_size: int, date: datetime.date): """Convert and dump to h5 Args: stream (InputStream) : input stream store (tables.File) : pytable output file_size (int) : size of the file date (date) : date of the file """ out_price = dict() out_volume = dict() date_offset_epoch = datetime.datetime.fromordinal( date.toordinal()).timestamp() with tqdm(total=file_size, desc="Streaming", unit="B", unit_scale=1, ncols=100) as pbar: while True: chunk = _load_chunk(stream) if chunk is None: break payload, exchange, session, category, security, chunk_size = chunk key = (exchange, security) for typ, row in _parse_chunk(payload, date_offset_epoch): if typ == b"4P": if key not in out_price: out_price[key] = store.create_earray( "/price", _get_security_code(exchange, security), obj=[list(row)], createparents=True, ) else: out_price[key].append([list(row)]) elif typ == b"VL": if key not in out_volume: out_volume[key] = store.create_earray( "/volume", _get_security_code(exchange, security), obj=[list(row)], createparents=True, ) else: out_volume[key].append([list(row)]) pbar.update(chunk_size)
def _create_column(self, h5: tb.File, colpath: str, atom: Optional[tb.Atom]=None, expectedrows: int=10000, shape: Optional[tuple]=None, data: (list, tuple, np.ndarray)=None) -> tb.EArray: # create an EArray column and return the created node if data is None and shape is None: shape = (0,) if data is not None and not isinstance(data, np.ndarray) and isinstance(data[0], str): data = [x.encode('utf-8') for x in data] return h5.create_earray( os.path.dirname(colpath), os.path.basename(colpath), obj=data, createparents=True, atom=atom, shape=shape, expectedrows=expectedrows, filters=self._filters )