예제 #1
0
def test_dataset_buffer__write_value():
    filename = os.path.join(tempfile.gettempdir(), "store.h5")
    try:
        with h5py.File(filename, "w") as store:
            columns = ("1", "2", "3", "4")
            max_size = 5000
            dataset = DatasetBuffer(store, "data", max_size, float, columns,
                                    max_chunk_bytes=128 * 1024)
            assert dataset.chunk_count == 4096
            for i in range(max_size):
                data = np.ones(4)
                dataset.write_value(data)
            assert dataset._buf_index == max_size - dataset.chunk_count
            dataset.flush_data()
            assert dataset._buf_index == 0

        with h5py.File(filename, "r") as store:
            data = store["data"][:]
            assert len(data) == max_size
            actual_columns = DatasetBuffer.get_columns(store["data"])
            assert [x for x in actual_columns] == list(columns)
            for i in range(max_size):
                for j in range(4):
                    assert data[i][j] == 1.0

            df = DatasetBuffer.to_dataframe(store["data"])
            assert isinstance(df, pd.DataFrame)
            assert len(df) == max_size
            assert df.iloc[0, 0] == 1.0
    finally:
        if os.path.exists(filename):
            os.remove(filename)
예제 #2
0
    def _get_filtered_dataframe(self,
                                elem_class,
                                prop,
                                name,
                                dataset,
                                real_only=False,
                                abs_val=False,
                                **kwargs):
        indices_df = self._get_indices_df()
        elem_index = self._elem_indices_by_prop[elem_class][prop][name]
        length = dataset.attrs["length"]
        data_vals = dataset[:length]

        # The time_step_dataset has these columns:
        # 1. time step index
        # 2. element index
        # Each row describes the source data in the dataset row.
        path = dataset.attrs["time_step_path"]
        time_step_data = self._hdf_store[path][:length]

        assert length == self._hdf_store[path].attrs["length"]
        data = []
        timestamps = []
        for i in range(length):
            stored_elem_index = time_step_data[:, 1][i]
            if stored_elem_index == elem_index:
                ts_index = time_step_data[:, 0][i]
                # TODO DT: more than one column?
                val = data_vals[i, 0]
                # TODO: profile this vs a df operation at end
                if real_only:
                    val = val.real
                elif abs_val:
                    val = abs(val)
                data.append(val)
                timestamps.append(indices_df.iloc[ts_index, 0])

        columns = self._fix_columns(name, DatasetBuffer.get_columns(dataset))
        return pd.DataFrame(data, columns=columns, index=timestamps)
예제 #3
0
    def get_filtered_dataframes(self,
                                element_class,
                                prop,
                                real_only=False,
                                abs_val=False):
        """Return the dataframes for all elements.

        Calling this is much more efficient than calling get_dataframe for each
        element.

        Parameters
        ----------
        element_class : str
        prop : str
        element_name : str
        real_only : bool
            If dtype of any column is complex, drop the imaginary component.
        abs_val : bool
            If dtype of any column is complex, compute its absolute value.

        Returns
        -------
        dict
            key = str (name), val = pd.DataFrame
            The dict will be empty if no data was stored.

        """
        if prop not in self.list_element_properties(element_class):
            logger.debug("%s/%s is not stored", element_class, prop)
            return {}

        dataset = self._group[f"{element_class}/ElementProperties/{prop}"]
        columns = DatasetBuffer.get_columns(dataset)
        names = DatasetBuffer.get_names(dataset)
        length = dataset.attrs["length"]
        indices_df = self._get_indices_df()
        data_vals = dataset[:length]
        elem_data = defaultdict(list)
        elem_timestamps = defaultdict(list)

        # The time_step_dataset has these columns:
        # 1. time step index
        # 2. element index
        # Each row describes the source data in the dataset row.
        path = dataset.attrs["time_step_path"]
        assert length == self._hdf_store[path].attrs["length"]
        time_step_data = self._hdf_store[path][:length]

        for i in range(length):
            ts_index = time_step_data[:, 0][i]
            elem_index = time_step_data[:, 1][i]
            # TODO DT: more than one column?
            val = data_vals[i, 0]
            if real_only:
                val = val.real
            elif abs_val:
                val = abs(val)
            elem_data[elem_index].append(val)
            elem_timestamps[elem_index].append(indices_df.iloc[ts_index, 0])

        dfs = {}
        for elem_index, vals in elem_data.items():
            elem_name = names[elem_index]
            cols = self._fix_columns(elem_name, columns)
            dfs[elem_name] = pd.DataFrame(
                vals,
                columns=cols,
                index=elem_timestamps[elem_index],
            )
        return dfs