def test_dataset_buffer__write_value(): filename = os.path.join(tempfile.gettempdir(), "store.h5") try: with h5py.File(filename, "w") as store: columns = ("1", "2", "3", "4") max_size = 2000 dataset = DatasetBuffer(store, "data", max_size, np.float, columns) assert dataset._chunk_size == 1024 for i in range(max_size): data = np.ones(4) dataset.write_value(data) assert dataset._buf_index == 2000 - dataset._chunk_size dataset.flush_data() assert dataset._buf_index == 0 with h5py.File(filename, "r") as store: data = store["data"][:] assert len(data) == max_size assert [x for x in store["data"].attrs["columns"]] == list(columns) for i in range(max_size): for j in range(4): assert data[i][j] == 1.0 df = DatasetBuffer.to_dataframe(store["data"]) assert isinstance(df, pd.DataFrame) assert len(df) == max_size assert df.iloc[0, 0] == 1.0 finally: if os.path.exists(filename): os.remove(filename)
class ValueContainer: """Container for a sequence of instances of ValueStorageBase.""" # These could potentially be reduced in bit lengths. Compression probably # makes that unnecessary. _TYPE_MAPPING = { float: np.float, int: np.int, complex: np.complex, } def __init__(self, value, hdf_store, path, max_size, dataset_property_type, max_chunk_bytes=None, store_timestamp=False): group_name = os.path.dirname(path) basename = os.path.basename(path) try: if basename in hdf_store[group_name].keys(): raise InvalidParameter(f"duplicate dataset name {basename}") except KeyError: # Don't bother checking each sub path. pass dtype = self._TYPE_MAPPING.get(value.value_type) assert dtype is not None scaleoffset = None if dtype == np.float: scaleoffset = 4 elif dtype == np.int: scaleoffset = 0 attributes = {"type": dataset_property_type.value} timestamp_path = None if store_timestamp: timestamp_path = self.timestamp_path(path) self._timestamps = DatasetBuffer( hdf_store, timestamp_path, max_size, np.float, ["Timestamp"], scaleoffset=scaleoffset, max_chunk_bytes=max_chunk_bytes, attributes={"type": DatasetPropertyType.TIMESTAMP.value}, ) attributes["timestamp_path"] = timestamp_path else: self._timestamps = None self._dataset = DatasetBuffer( hdf_store, path, max_size, dtype, value.make_columns(), scaleoffset=scaleoffset, max_chunk_bytes=max_chunk_bytes, attributes=attributes, ) @staticmethod def timestamp_path(path): return path + "Timestamp" def append(self, value, timestamp=None): """Append a value to the container. Parameters ---------- value : ValueStorageBase timestamp : float | None """ self._dataset.write_value(value.value) if self._timestamps is not None: assert timestamp is not None self._timestamps.write_value(timestamp) def flush_data(self): """Flush any outstanding data to disk.""" self._dataset.flush_data() if self._timestamps is not None: self._timestamps.flush_data() def max_num_bytes(self): """Return the maximum number of bytes the container could hold. Returns ------- int """ return self._dataset.max_num_bytes()
class ValueContainer: """Container for a sequence of instances of ValueStorageBase.""" def __init__(self, values, hdf_store, path, max_size, elem_names, dataset_property_type, max_chunk_bytes=None, store_time_step=False): group_name = os.path.dirname(path) basename = os.path.basename(path) try: if basename in hdf_store[group_name]: raise InvalidParameter(f"duplicate dataset name {basename}") except KeyError: # Don't bother checking each sub path. pass dtype = values[0].value_type scaleoffset = None # There is no np.float128 on Windows. if dtype in (float, np.float32, np.float64, np.longdouble): scaleoffset = 4 time_step_path = None max_size = max_size * len(values) if store_time_step else max_size if store_time_step: # Store indices for time step and element. # Each row of this dataset corresponds to a row in the data. # This will be required to interpret the raw data. attributes = {"type": DatasetPropertyType.TIME_STEP.value} time_step_path = self.time_step_path(path) self._time_steps = DatasetBuffer( hdf_store, time_step_path, max_size, int, ["Time", "Name"], scaleoffset=0, max_chunk_bytes=max_chunk_bytes, attributes=attributes, ) columns = [] tmp_columns = values[0].make_columns() for column in tmp_columns: fields = column.split(ValueStorageBase.DELIMITER) fields[0] = "AllNames" columns.append(ValueStorageBase.DELIMITER.join(fields)) column_ranges = [0, len(tmp_columns)] else: columns = [] column_ranges = [] col_index = 0 for value in values: tmp_columns = value.make_columns() col_range = (col_index, len(tmp_columns)) column_ranges.append(col_range) for column in tmp_columns: columns.append(column) col_index += 1 self._time_steps = None attributes = {"type": dataset_property_type.value} if store_time_step: attributes["time_step_path"] = time_step_path self._dataset = DatasetBuffer( hdf_store, path, max_size, dtype, columns, scaleoffset=scaleoffset, max_chunk_bytes=max_chunk_bytes, attributes=attributes, names=elem_names, column_ranges_per_name=column_ranges, ) @staticmethod def time_step_path(path): return path + "TimeStep" def append(self, values): """Append a value to the container. Parameters ---------- value : list list of ValueStorageBase """ if isinstance(values[0].value, list): vals = [x for y in values for x in y.value] else: vals = [x.value for x in values] self._dataset.write_value(vals) def append_by_time_step(self, value, time_step, elem_index): """Append a value to the container. Parameters ---------- value : ValueStorageBase time_step : int elem_index : int """ if isinstance(value.value, list): vals = [x for x in value.value] else: vals = value.value self._dataset.write_value(vals) self._time_steps.write_value([time_step, elem_index]) def flush_data(self): """Flush any outstanding data to disk.""" self._dataset.flush_data() if self._time_steps is not None: self._time_steps.flush_data() def max_num_bytes(self): """Return the maximum number of bytes the container could hold. Returns ------- int """ return self._dataset.max_num_bytes()