コード例 #1
0
ファイル: test_dataset_buffer.py プロジェクト: yuanzy97/PyDSS
def test_dataset_buffer__write_value():
    filename = os.path.join(tempfile.gettempdir(), "store.h5")
    try:
        with h5py.File(filename, "w") as store:
            columns = ("1", "2", "3", "4")
            max_size = 2000
            dataset = DatasetBuffer(store, "data", max_size, np.float, columns)
            assert dataset._chunk_size == 1024
            for i in range(max_size):
                data = np.ones(4)
                dataset.write_value(data)
            assert dataset._buf_index == 2000 - dataset._chunk_size
            dataset.flush_data()
            assert dataset._buf_index == 0

        with h5py.File(filename, "r") as store:
            data = store["data"][:]
            assert len(data) == max_size
            assert [x for x in store["data"].attrs["columns"]] == list(columns)
            for i in range(max_size):
                for j in range(4):
                    assert data[i][j] == 1.0

            df = DatasetBuffer.to_dataframe(store["data"])
            assert isinstance(df, pd.DataFrame)
            assert len(df) == max_size
            assert df.iloc[0, 0] == 1.0
    finally:
        if os.path.exists(filename):
            os.remove(filename)
コード例 #2
0
ファイル: value_storage.py プロジェクト: yuanzy97/PyDSS
class ValueContainer:
    """Container for a sequence of instances of ValueStorageBase."""

    # These could potentially be reduced in bit lengths. Compression probably
    # makes that unnecessary.
    _TYPE_MAPPING = {
        float: np.float,
        int: np.int,
        complex: np.complex,
    }

    def __init__(self,
                 value,
                 hdf_store,
                 path,
                 max_size,
                 dataset_property_type,
                 max_chunk_bytes=None,
                 store_timestamp=False):
        group_name = os.path.dirname(path)
        basename = os.path.basename(path)
        try:
            if basename in hdf_store[group_name].keys():
                raise InvalidParameter(f"duplicate dataset name {basename}")
        except KeyError:
            # Don't bother checking each sub path.
            pass

        dtype = self._TYPE_MAPPING.get(value.value_type)
        assert dtype is not None
        scaleoffset = None
        if dtype == np.float:
            scaleoffset = 4
        elif dtype == np.int:
            scaleoffset = 0
        attributes = {"type": dataset_property_type.value}
        timestamp_path = None

        if store_timestamp:
            timestamp_path = self.timestamp_path(path)
            self._timestamps = DatasetBuffer(
                hdf_store,
                timestamp_path,
                max_size,
                np.float,
                ["Timestamp"],
                scaleoffset=scaleoffset,
                max_chunk_bytes=max_chunk_bytes,
                attributes={"type": DatasetPropertyType.TIMESTAMP.value},
            )
            attributes["timestamp_path"] = timestamp_path
        else:
            self._timestamps = None

        self._dataset = DatasetBuffer(
            hdf_store,
            path,
            max_size,
            dtype,
            value.make_columns(),
            scaleoffset=scaleoffset,
            max_chunk_bytes=max_chunk_bytes,
            attributes=attributes,
        )

    @staticmethod
    def timestamp_path(path):
        return path + "Timestamp"

    def append(self, value, timestamp=None):
        """Append a value to the container.

        Parameters
        ----------
        value : ValueStorageBase
        timestamp : float | None

        """
        self._dataset.write_value(value.value)
        if self._timestamps is not None:
            assert timestamp is not None
            self._timestamps.write_value(timestamp)

    def flush_data(self):
        """Flush any outstanding data to disk."""
        self._dataset.flush_data()
        if self._timestamps is not None:
            self._timestamps.flush_data()

    def max_num_bytes(self):
        """Return the maximum number of bytes the container could hold.

        Returns
        -------
        int

        """
        return self._dataset.max_num_bytes()
コード例 #3
0
ファイル: value_storage.py プロジェクト: NREL/PyDSS
class ValueContainer:
    """Container for a sequence of instances of ValueStorageBase."""
    def __init__(self,
                 values,
                 hdf_store,
                 path,
                 max_size,
                 elem_names,
                 dataset_property_type,
                 max_chunk_bytes=None,
                 store_time_step=False):
        group_name = os.path.dirname(path)
        basename = os.path.basename(path)
        try:
            if basename in hdf_store[group_name]:
                raise InvalidParameter(f"duplicate dataset name {basename}")
        except KeyError:
            # Don't bother checking each sub path.
            pass

        dtype = values[0].value_type
        scaleoffset = None
        # There is no np.float128 on Windows.
        if dtype in (float, np.float32, np.float64, np.longdouble):
            scaleoffset = 4
        time_step_path = None
        max_size = max_size * len(values) if store_time_step else max_size

        if store_time_step:
            # Store indices for time step and element.
            # Each row of this dataset corresponds to a row in the data.
            # This will be required to interpret the raw data.
            attributes = {"type": DatasetPropertyType.TIME_STEP.value}
            time_step_path = self.time_step_path(path)
            self._time_steps = DatasetBuffer(
                hdf_store,
                time_step_path,
                max_size,
                int,
                ["Time", "Name"],
                scaleoffset=0,
                max_chunk_bytes=max_chunk_bytes,
                attributes=attributes,
            )
            columns = []
            tmp_columns = values[0].make_columns()
            for column in tmp_columns:
                fields = column.split(ValueStorageBase.DELIMITER)
                fields[0] = "AllNames"
                columns.append(ValueStorageBase.DELIMITER.join(fields))
            column_ranges = [0, len(tmp_columns)]
        else:
            columns = []
            column_ranges = []
            col_index = 0
            for value in values:
                tmp_columns = value.make_columns()
                col_range = (col_index, len(tmp_columns))
                column_ranges.append(col_range)
                for column in tmp_columns:
                    columns.append(column)
                    col_index += 1
            self._time_steps = None

        attributes = {"type": dataset_property_type.value}
        if store_time_step:
            attributes["time_step_path"] = time_step_path

        self._dataset = DatasetBuffer(
            hdf_store,
            path,
            max_size,
            dtype,
            columns,
            scaleoffset=scaleoffset,
            max_chunk_bytes=max_chunk_bytes,
            attributes=attributes,
            names=elem_names,
            column_ranges_per_name=column_ranges,
        )

    @staticmethod
    def time_step_path(path):
        return path + "TimeStep"

    def append(self, values):
        """Append a value to the container.

        Parameters
        ----------
        value : list
            list of ValueStorageBase

        """
        if isinstance(values[0].value, list):
            vals = [x for y in values for x in y.value]
        else:
            vals = [x.value for x in values]

        self._dataset.write_value(vals)

    def append_by_time_step(self, value, time_step, elem_index):
        """Append a value to the container.

        Parameters
        ----------
        value : ValueStorageBase
        time_step : int
        elem_index : int

        """
        if isinstance(value.value, list):
            vals = [x for x in value.value]
        else:
            vals = value.value

        self._dataset.write_value(vals)
        self._time_steps.write_value([time_step, elem_index])

    def flush_data(self):
        """Flush any outstanding data to disk."""
        self._dataset.flush_data()
        if self._time_steps is not None:
            self._time_steps.flush_data()

    def max_num_bytes(self):
        """Return the maximum number of bytes the container could hold.

        Returns
        -------
        int

        """
        return self._dataset.max_num_bytes()