def InitializeDataStore(self, hdf_store, num_steps, MC_scenario_number=None): if MC_scenario_number is not None: self._scenario = self._base_scenario + f"_MC{MC_scenario_number}" self._hdf_store = hdf_store self._time_dataset = DatasetBuffer( hdf_store=hdf_store, path=f"Exports/{self._scenario}/Timestamp", max_size=num_steps, dtype=float, columns=("Timestamp", ), max_chunk_bytes=self._max_chunk_bytes) self._frequency_dataset = DatasetBuffer( hdf_store=hdf_store, path=f"Exports/{self._scenario}/Frequency", max_size=num_steps, dtype=float, columns=("Frequency", ), max_chunk_bytes=self._max_chunk_bytes) self._mode_dataset = DatasetBuffer( hdf_store=hdf_store, path=f"Exports/{self._scenario}/Mode", max_size=num_steps, dtype="S10", columns=("Mode", ), max_chunk_bytes=self._max_chunk_bytes) for element in self._elements: element.initialize_data_store(hdf_store, self._scenario, num_steps)
def InitializeDataStore(self, hdf_store, num_steps, MC_scenario_number=None): if MC_scenario_number is not None: self._scenario = self._base_scenario + f"_MC{MC_scenario_number}" self._hdf_store = hdf_store self._time_dataset = DatasetBuffer( hdf_store=hdf_store, path=f"Exports/{self._scenario}/Timestamp", max_size=num_steps, dtype=float, columns=("Timestamp",), max_chunk_bytes=self._max_chunk_bytes ) self._frequency_dataset = DatasetBuffer( hdf_store=hdf_store, path=f"Exports/{self._scenario}/Frequency", max_size=num_steps, dtype=float, columns=("Frequency",), max_chunk_bytes=self._max_chunk_bytes ) self._mode_dataset = DatasetBuffer( hdf_store=hdf_store, path=f"Exports/{self._scenario}/Mode", max_size=num_steps, dtype="S10", columns=("Mode",), max_chunk_bytes=self._max_chunk_bytes ) self._cur_step = 0 base_path = "Exports/" + self._scenario for metric in self._iter_metrics(): metric.initialize_data_store(hdf_store, base_path, num_steps)
def __init__(self, value, hdf_store, path, max_size, dataset_property_type, max_chunk_bytes=None, store_timestamp=False): group_name = os.path.dirname(path) basename = os.path.basename(path) try: if basename in hdf_store[group_name].keys(): raise InvalidParameter(f"duplicate dataset name {basename}") except KeyError: # Don't bother checking each sub path. pass dtype = self._TYPE_MAPPING.get(value.value_type) assert dtype is not None scaleoffset = None if dtype == np.float: scaleoffset = 4 elif dtype == np.int: scaleoffset = 0 attributes = {"type": dataset_property_type.value} timestamp_path = None if store_timestamp: timestamp_path = self.timestamp_path(path) self._timestamps = DatasetBuffer( hdf_store, timestamp_path, max_size, np.float, ["Timestamp"], scaleoffset=scaleoffset, max_chunk_bytes=max_chunk_bytes, attributes={"type": DatasetPropertyType.TIMESTAMP.value}, ) attributes["timestamp_path"] = timestamp_path else: self._timestamps = None self._dataset = DatasetBuffer( hdf_store, path, max_size, dtype, value.make_columns(), scaleoffset=scaleoffset, max_chunk_bytes=max_chunk_bytes, attributes=attributes, )
def test_dataset_buffer__write_value(): filename = os.path.join(tempfile.gettempdir(), "store.h5") try: with h5py.File(filename, "w") as store: columns = ("1", "2", "3", "4") max_size = 2000 dataset = DatasetBuffer(store, "data", max_size, np.float, columns) assert dataset._chunk_size == 1024 for i in range(max_size): data = np.ones(4) dataset.write_value(data) assert dataset._buf_index == 2000 - dataset._chunk_size dataset.flush_data() assert dataset._buf_index == 0 with h5py.File(filename, "r") as store: data = store["data"][:] assert len(data) == max_size assert [x for x in store["data"].attrs["columns"]] == list(columns) for i in range(max_size): for j in range(4): assert data[i][j] == 1.0 df = DatasetBuffer.to_dataframe(store["data"]) assert isinstance(df, pd.DataFrame) assert len(df) == max_size assert df.iloc[0, 0] == 1.0 finally: if os.path.exists(filename): os.remove(filename)
def test_dataset_buffer__max_num_bytes(): filename = os.path.join(tempfile.gettempdir(), "store.h5") try: with h5py.File(filename, "w") as store: columns = ("1", "2", "3", "4") dataset = DatasetBuffer(store, "data", 100, np.float, columns) assert dataset.max_num_bytes() == 3200 finally: if os.path.exists(filename): os.remove(filename)
def __init__(self, values, hdf_store, path, max_size, elem_names, dataset_property_type, max_chunk_bytes=None, store_time_step=False): group_name = os.path.dirname(path) basename = os.path.basename(path) try: if basename in hdf_store[group_name]: raise InvalidParameter(f"duplicate dataset name {basename}") except KeyError: # Don't bother checking each sub path. pass dtype = values[0].value_type scaleoffset = None # There is no np.float128 on Windows. if dtype in (float, np.float32, np.float64, np.longdouble): scaleoffset = 4 time_step_path = None max_size = max_size * len(values) if store_time_step else max_size if store_time_step: # Store indices for time step and element. # Each row of this dataset corresponds to a row in the data. # This will be required to interpret the raw data. attributes = {"type": DatasetPropertyType.TIME_STEP.value} time_step_path = self.time_step_path(path) self._time_steps = DatasetBuffer( hdf_store, time_step_path, max_size, int, ["Time", "Name"], scaleoffset=0, max_chunk_bytes=max_chunk_bytes, attributes=attributes, ) columns = [] tmp_columns = values[0].make_columns() for column in tmp_columns: fields = column.split(ValueStorageBase.DELIMITER) fields[0] = "AllNames" columns.append(ValueStorageBase.DELIMITER.join(fields)) column_ranges = [0, len(tmp_columns)] else: columns = [] column_ranges = [] col_index = 0 for value in values: tmp_columns = value.make_columns() col_range = (col_index, len(tmp_columns)) column_ranges.append(col_range) for column in tmp_columns: columns.append(column) col_index += 1 self._time_steps = None attributes = {"type": dataset_property_type.value} if store_time_step: attributes["time_step_path"] = time_step_path self._dataset = DatasetBuffer( hdf_store, path, max_size, dtype, columns, scaleoffset=scaleoffset, max_chunk_bytes=max_chunk_bytes, attributes=attributes, names=elem_names, column_ranges_per_name=column_ranges, )