Beispiel #1
0
def _add_field_factory(field_type: str) -> Callable:
    func = fieldtypes.function(field_type)

    def _add_field(self,
                   name,
                   val=None,
                   unit=None,
                   write_level=None,
                   suffix=None,
                   **field_args):
        """Add a {field_type} field to the dataset"""
        if name in self._fields:
            raise exceptions.FieldExistsError(
                f"Field {name!r} already exists in dataset")

        # Create collections for nested fields
        collection, _, field_name = name.rpartition(".")
        if collection and collection not in self._fields:
            self.add_collection(collection)

        # Create field
        field = func(num_obs=self.num_obs,
                     name=field_name,
                     val=val,
                     unit=unit,
                     write_level=write_level,
                     **field_args)
        # Add field to list of fields
        fields = getattr(self, collection) if collection else self._fields
        fields[field_name] = field

    _add_field.__doc__ = _add_field.__doc__.format(field_type=field_type)

    return _add_field
Beispiel #2
0
    def read(cls, file_path: Union[str, pathlib.Path]) -> "Dataset":
        """Read a dataset from file"""

        log.debug(f"Read dataset from {file_path}")

        # Dictionary to keep track of references in the data structure
        # key: field_name, value: object (TimeArray, PositionArray, etc)
        memo = {}

        # Read fields from file
        with h5py.File(file_path, mode="r") as h5_file:
            num_obs = h5_file.attrs["num_obs"]
            dset = cls(num_obs=num_obs)
            dset.vars.update(_h5utils.decode_h5attr(h5_file.attrs["vars"]))

            # Read fields
            for fieldname, fieldtype in _h5utils.decode_h5attr(
                    h5_file.attrs["fields"]).items():
                field = fieldtypes.function(fieldtype).read(
                    h5_file[fieldname], memo)
                dset._fields[fieldname] = field
                memo[fieldname] = field.data

            # Read meta
            dset.meta.read(h5_file["__meta__"])
        return dset
Beispiel #3
0
    def read(cls, h5_group, memo):
        name = h5_group.attrs["fieldname"]
        field = cls(num_obs=None, name=name, val=None)  # num_obs and val not used
        fields = _h5utils.decode_h5attr(h5_group.attrs["fields"])
        for fieldname, fieldtype in fields.items():
            field.data._fields[fieldname] = fieldtypes.function(fieldtype).read(h5_group[fieldname], memo)

        return field
Beispiel #4
0
    def difference(self,
                   other,
                   index_by=None,
                   copy_self_on_error=False,
                   copy_other_on_error=False):
        """Compute the difference between two datasets: self - other

        index_by fields will be copied from self to the difference dataset and excluded from the - operation

        Args:
            other:               Dataset to substract from self
            index_by:            Comma separated text string with name of fields
                                 (columns) that will be used to find common
                                 elements (rows).
            copy_self_on_error:  Copy value of fields in self to the difference 
                                 dataset if the - operation fails for a field
            copy_other_on_error: Copy value of fields in other to the difference
                                 dataset if the - operation fails for a field
        Returns:
            A new dataset with fields that contains the differene between fields in self and other
        """
        if index_by is None:
            if len(self) != len(other):
                raise ValueError(
                    f"Cannot compute difference between datasets with different number of observations ({self.num_obs} vs {other.num_obs})"
                )
            num_obs = len(self)
            self_idx = np.ones(len(self), dtype=bool)
            other_idx = np.ones(len(other), dtype=bool)
        else:
            _index_by = index_by.split(",")
            self_index_data = [self[n.strip()] for n in _index_by]
            other_index_data = [other[n.strip()] for n in _index_by]
            A = np.rec.fromarrays(self_index_data)
            B = np.rec.fromarrays(other_index_data)
            common, self_idx, other_idx = np.intersect1d(A,
                                                         B,
                                                         return_indices=True)
            num_obs = len(common)

        if num_obs == 0:
            raise ValueError(
                f"Nothing to differentiate. No common data found for chosen option index_by '{index_by}'."
            )

        result = self._difference(
            other,
            num_obs,
            self_idx,
            other_idx,
            copy_self_on_error=copy_self_on_error,
            copy_other_on_error=copy_other_on_error,
        )

        # Overwrite field index_by difference with original value
        if index_by is not None:
            _index_by = index_by.split(",")
            for index_field in _index_by:
                index_field = index_field.strip()
                try:
                    del result[index_field]
                except AttributeError:
                    # Field does not exists so no need to delete
                    pass

                index_data = self[index_field][self_idx]
                fieldtype = fieldtypes.fieldtype(index_data)
                func = fieldtypes.function(fieldtype)
                field = func(
                    num_obs=num_obs,
                    name=index_field,
                    val=index_data,
                    unit=self.field(index_field)._unit,
                    write_level=self.field(index_field)._write_level.name,
                )
                result._fields[index_field] = field

        return result
Beispiel #5
0
    def _difference(self, other, num_obs, self_idx, other_idx, copy_self_on_error=False, copy_other_on_error=False):
        """Perform the - operation for each field in self and other"""
        result = self.__class__()
        for fieldname, field in self._fields.items():
            if fieldname in other._fields:
                try:
                    factors = [Unit(_from, _to) for _to, _from in zip(field._unit, other._fields[fieldname]._unit)]
                except TypeError:
                    factors = None
                except exceptions.UnitError as err:
                    raise ValueError(f"Cannot compute difference for field `{fieldname}`: {err}")
                try:
                    if factors:
                        difference = self[fieldname][self_idx] - other[fieldname][other_idx] * np.array(factors)
                    else:
                        difference = self[fieldname][self_idx] - other[fieldname][other_idx]
                    fieldtype = fieldtypes.fieldtype(difference)
                    func = fieldtypes.function(fieldtype)
                    field = func(
                        num_obs=num_obs,
                        name=fieldname,
                        val=difference,
                        unit=field._unit,
                        write_level=field._write_level.name,
                    )
                    result.add_field(fieldname, field)
                except IndexError as err:
                    # fieldname is a collection
                    collection = self[fieldname]._difference(
                        other[fieldname],
                        num_obs,
                        self_idx,
                        other_idx,
                        copy_self_on_error=copy_self_on_error,
                        copy_other_on_error=copy_other_on_error,
                    )
                    fieldtype = fieldtypes.fieldtype(collection)
                    func = fieldtypes.function(fieldtype)
                    field = func(
                        num_obs=num_obs,
                        name=fieldname,
                        val=collection,
                        unit=field._unit,
                        write_level=field._write_level.name,
                    )
                    result.add_field(fieldname, field)
                except TypeError as err:
                    # Fields that do not support the - operator
                    if copy_self_on_error:
                        index_data = self[fieldname][self_idx]
                        fieldtype = fieldtypes.fieldtype(index_data)
                        func = fieldtypes.function(fieldtype)
                        self_fieldname = f"{fieldname}_self"
                        field = func(
                            num_obs=num_obs,
                            name=self_fieldname,
                            val=index_data,
                            unit=field._unit,
                            write_level=field._write_level.name,
                        )
                        result.add_field(self_fieldname, field)
                    if copy_other_on_error:
                        index_data = other[fieldname][other_idx]
                        fieldtype = fieldtypes.fieldtype(index_data)
                        func = fieldtypes.function(fieldtype)
                        other_fieldname = f"{fieldname}_other"
                        field = func(
                            num_obs=num_obs,
                            name=other_fieldname,
                            val=index_data,
                            unit=other._fields[fieldname]._unit,
                            write_level=other._fields[fieldname]._write_level.name,
                        )
                        result.add_field(other_fieldname, field)

        return result