Example #1
0
    def _parse_datasets(self):
        for elem_class in self._elem_classes:
            class_group = self._group[elem_class]
            if "ElementProperties" in class_group:
                prop_group = class_group["ElementProperties"]
                for prop, dataset in prop_group.items():
                    dataset_property_type = get_dataset_property_type(dataset)
                    if dataset_property_type == DatasetPropertyType.TIME_STEP:
                        continue
                    if dataset_property_type == DatasetPropertyType.VALUE:
                        self._elem_values_by_prop[elem_class][prop] = []
                        prop_names = self._elem_values_by_prop
                    elif dataset_property_type in (
                            DatasetPropertyType.PER_TIME_POINT,
                            DatasetPropertyType.FILTERED,
                    ):
                        self._elem_data_by_prop[elem_class][prop] = []
                        prop_names = self._elem_data_by_prop
                    else:
                        continue

                    self._props_by_class[elem_class].append(prop)
                    self._elem_indices_by_prop[elem_class][prop] = {}
                    names = DatasetBuffer.get_names(dataset)
                    self._column_ranges_per_elem[elem_class][prop] = \
                        DatasetBuffer.get_column_ranges(dataset)
                    for i, name in enumerate(names):
                        self._elems_by_class[elem_class].add(name)
                        prop_names[elem_class][prop].append(name)
                        self._elem_indices_by_prop[elem_class][prop][name] = i
                        self._elem_props[name].append(prop)
            else:
                self._elems_by_class[elem_class] = set()

            summed_elem_props = self._group[elem_class].get(
                "SummedElementProperties", [])
            for prop in summed_elem_props:
                dataset = self._group[elem_class]["SummedElementProperties"][
                    prop]
                dataset_property_type = get_dataset_property_type(dataset)
                if dataset_property_type == DatasetPropertyType.VALUE:
                    df = DatasetBuffer.to_dataframe(dataset)
                    assert len(df) == 1
                    self._summed_elem_props[elem_class][prop] = {
                        x: df[x].values[0]
                        for x in df.columns
                    }
                elif dataset_property_type == DatasetPropertyType.PER_TIME_POINT:
                    self._summed_elem_timeseries_props[elem_class].append(prop)
Example #2
0
    def get_filtered_dataframes(self,
                                element_class,
                                prop,
                                real_only=False,
                                abs_val=False):
        """Return the dataframes for all elements.

        Calling this is much more efficient than calling get_dataframe for each
        element.

        Parameters
        ----------
        element_class : str
        prop : str
        element_name : str
        real_only : bool
            If dtype of any column is complex, drop the imaginary component.
        abs_val : bool
            If dtype of any column is complex, compute its absolute value.

        Returns
        -------
        dict
            key = str (name), val = pd.DataFrame
            The dict will be empty if no data was stored.

        """
        if prop not in self.list_element_properties(element_class):
            logger.debug("%s/%s is not stored", element_class, prop)
            return {}

        dataset = self._group[f"{element_class}/ElementProperties/{prop}"]
        columns = DatasetBuffer.get_columns(dataset)
        names = DatasetBuffer.get_names(dataset)
        length = dataset.attrs["length"]
        indices_df = self._get_indices_df()
        data_vals = dataset[:length]
        elem_data = defaultdict(list)
        elem_timestamps = defaultdict(list)

        # The time_step_dataset has these columns:
        # 1. time step index
        # 2. element index
        # Each row describes the source data in the dataset row.
        path = dataset.attrs["time_step_path"]
        assert length == self._hdf_store[path].attrs["length"]
        time_step_data = self._hdf_store[path][:length]

        for i in range(length):
            ts_index = time_step_data[:, 0][i]
            elem_index = time_step_data[:, 1][i]
            # TODO DT: more than one column?
            val = data_vals[i, 0]
            if real_only:
                val = val.real
            elif abs_val:
                val = abs(val)
            elem_data[elem_index].append(val)
            elem_timestamps[elem_index].append(indices_df.iloc[ts_index, 0])

        dfs = {}
        for elem_index, vals in elem_data.items():
            elem_name = names[elem_index]
            cols = self._fix_columns(elem_name, columns)
            dfs[elem_name] = pd.DataFrame(
                vals,
                columns=cols,
                index=elem_timestamps[elem_index],
            )
        return dfs