Example #1
0
def _dump_to_h5(stream: BytesIO, store: tables.File, file_size: int,
                date: datetime.date):
    """Convert and dump to h5
    Args:
        stream (InputStream) : input stream
        store (tables.File)  : pytable output
        file_size (int)      : size of the file
        date (date)          : date of the file
    """

    out_price = dict()
    out_volume = dict()

    date_offset_epoch = datetime.datetime.fromordinal(
        date.toordinal()).timestamp()

    with tqdm(total=file_size,
              desc="Streaming",
              unit="B",
              unit_scale=1,
              ncols=100) as pbar:
        while True:

            chunk = _load_chunk(stream)
            if chunk is None:
                break

            payload, exchange, session, category, security, chunk_size = chunk
            key = (exchange, security)

            for typ, row in _parse_chunk(payload, date_offset_epoch):

                if typ == b"4P":

                    if key not in out_price:
                        out_price[key] = store.create_earray(
                            "/price",
                            _get_security_code(exchange, security),
                            obj=[list(row)],
                            createparents=True,
                        )
                    else:
                        out_price[key].append([list(row)])

                elif typ == b"VL":

                    if key not in out_volume:
                        out_volume[key] = store.create_earray(
                            "/volume",
                            _get_security_code(exchange, security),
                            obj=[list(row)],
                            createparents=True,
                        )
                    else:
                        out_volume[key].append([list(row)])
            pbar.update(chunk_size)
Example #2
0
    def _create_column(self, h5: tb.File, colpath: str, atom: Optional[tb.Atom]=None, expectedrows: int=10000,
                       shape: Optional[tuple]=None, data: (list, tuple, np.ndarray)=None) -> tb.EArray:
        # create an EArray column and return the created node

        if data is None and shape is None:
            shape = (0,)

        if data is not None and not isinstance(data, np.ndarray) and isinstance(data[0], str):
            data = [x.encode('utf-8') for x in data]

        return h5.create_earray(
            os.path.dirname(colpath), os.path.basename(colpath), obj=data, createparents=True,
            atom=atom, shape=shape, expectedrows=expectedrows, filters=self._filters
        )