Example #1
0
    def add_classifier(self, hfile):
        """Create classifier group and add data."""
        cgroup = hfile.create_group('/classifier')

        cgroup.create_dataset('weights',
                              data=self.weights,
                              **self.__compression)
        cgroup.create_dataset('intercept', data=[self.classifier.intercept_])

        info_group = cgroup.create_group('info')
        addstring = partial(self.addstring, info_group)
        addstring('classname', self.classname)
        addstring('subject', self.subject)

        params = json.dumps(self.params)
        addstring('params', params, dtype='|S{:d}'.format(len(params)))

        if self.roc is None:
            info_group.create_dataset('roc', dtype=h5py.Empty('<f8'))
        else:
            info_group.create_dataset('roc',
                                      data=self.roc,
                                      **self.__compression)

        if self.auc is None:
            info_group.create_dataset('auc', dtype=h5py.Empty('<f8'))
        else:
            info_group.create_dataset('auc',
                                      data=[self.auc],
                                      **self.__compression)
Example #2
0
    def test_empty(self):
        # https://github.com/h5py/h5py/issues/1540
        """ Create attribute with h5py.Empty value
        """
        self.f.attrs.create('empty', h5py.Empty('f'))
        self.assertEqual(self.f.attrs['empty'], h5py.Empty('f'))

        self.f.attrs.create('empty', h5py.Empty(None))
        self.assertEqual(self.f.attrs['empty'], h5py.Empty(None))
Example #3
0
    def _save_run_results_hdf(outfile, results):
        # results: model_id timestamp class_labels (bin + roi_numbers)
        #          input_images output_classes output_scores

        with h5.File(outfile, 'w') as f:
            meta = f.create_dataset('metadata', data=h5.Empty('f'))
            meta.attrs['version'] = results['version']
            meta.attrs['model_id'] = results['model_id']
            meta.attrs['timestamp'] = results['timestamp']
            f.create_dataset('output_classes',
                             data=results['output_classes'],
                             compression='gzip',
                             dtype='float16')
            f.create_dataset('output_scores',
                             data=results['output_scores'],
                             compression='gzip',
                             dtype='float16')
            f.create_dataset('class_labels',
                             data=np.string_(results['class_labels']),
                             compression='gzip',
                             dtype=h5.string_dtype())
            if results['bin_id']:
                meta.attrs['bin_id'] = results['bin_id']
                f.create_dataset('roi_numbers',
                                 data=results['roi_numbers'],
                                 compression='gzip',
                                 dtype='uint16')
            else:
                f.create_dataset('input_images',
                                 data=np.string_(results['input_images']),
                                 compression='gzip',
                                 dtype=h5.string_dtype())
Example #4
0
def _write_to_group(group, dictionary):
    """ writes a dictionary to a hdf5 group, which can recurse"""
    for key in dictionary.keys():
        value = dictionary[key]
        if isinstance(value, str):
            # needs another conversion to string to catch weird subtypes
            # like numpy.str_
            group.attrs[key] = str(value)
        elif isinstance(value, np.ndarray):
            if str(value.dtype)[:2] == '<U':
                group[key] = value.astype('S')
            else:
                group[key] = value
        elif isinstance(value, list):
            _write_list(group, key, value)
        elif isinstance(value, dict):
            subgroup = group.create_group(key)
            _write_to_group(subgroup, value)
        elif value is None:
            group[key] = h5py.Empty("f")
        elif isinstance(value, Iterable):
            if isinstance(value[0], str):
                group.attrs[key] = value
        else:
            group[key] = value
Example #5
0
    def setUp(self):
        super().setUp()

        with h5py.File(os.path.join(self.notebook_dir, "test_file.h5"),
                       "w") as h5file:
            # Empty group
            h5file.create_group("empty_group")

            # Group with 2 children
            grp = h5file.create_group("group_with_children")
            # A simple dataset
            grp["dataset_1"] = np.random.random((2, 3, 4))
            # and a group with attributes
            attr_grp = grp.create_group("group_with_attrs")
            attr_grp.attrs["array_attr"] = np.arange(0, 1, 0.1, dtype=">f4")
            attr_grp.attrs["bool_attr"] = True
            attr_grp.attrs["complex_attr"] = np.complex64(1 + 2j)
            attr_grp.attrs["number_attr"] = np.int32(5676)
            attr_grp.attrs["string_attr"] = "I am a group"

            # Scalar dataset
            h5file["scalar"] = 56

            # Empty dataset
            h5file["empty"] = h5py.Empty(">f8")
Example #6
0
def createHdfTestData():
    """ Create files with the python h5py module.
    """
    import h5py

    # Emtpy datasets
    # See https://docs.h5py.org/en/latest/high/dataset.html#creating-and-reading-empty-or-null-datasets-and-attributes
    # and https://github.com/h5py/h5py/issues/279#issuecomment-15313062 for a possible use case
    with h5py.File('empty.h5', 'w') as h5Root:
        ds = h5Root.create_dataset("emptyDataset", data=h5py.Empty("f"))
        ds.attrs['Description'] = 'An empty dataset'

        # Broken in 2.10 but was fixed in 3.0 https://github.com/h5py/h5py/issues/1540
        ds = h5Root.create_dataset("hasEmptyAttribute", data=np.arange(5))
        ds.attrs['Description'] = 'An regular dataset with an empty attribute'
        ds.attrs["emptyAttr"] = h5py.Empty(np.float32)
Example #7
0
 def _save_validation_results_hdf(self, outfile, results):
     attrib_data = ['model_id', 'timestamp']
     attrib_data += 'f1_weighted recall_weighted precision_weighted f1_macro recall_macro precision_macro'.split(
     )
     int_data = [
         'input_classes', 'output_classes'
     ] + 'counts_perclass val_counts_perclass train_counts_perclass'.split(
     )
     int_data.extend([
         'classes_by_' + stat
         for stat in 'f1 recall precision count'.split()
     ])
     string_data = ['class_labels', 'image_fullpaths', 'image_basenames']
     with h5.File(outfile, 'w') as f:
         meta = f.create_dataset('metadata', data=h5.Empty('f'))
         for series in results:
             if series in attrib_data: meta.attrs[series] = results[series]
             elif series in string_data:
                 f.create_dataset(series,
                                  data=np.string_(results[series]),
                                  compression='gzip',
                                  dtype=h5.string_dtype())
             elif series in int_data:
                 f.create_dataset(series,
                                  data=results[series],
                                  compression='gzip',
                                  dtype='int16')
             elif isinstance(results[series], np.ndarray):
                 f.create_dataset(series,
                                  data=results[series],
                                  compression='gzip',
                                  dtype='float16')
             else:
                 raise UserWarning(
                     'hdf results: WE MISSED THIS ONE: {}'.format(series))
Example #8
0
 def setUp(self):
     TestCase.setUp(self)
     filename = self.getFileName("dataset_testempty")
     print("filename:", filename)
     self.f = h5py.File(filename, 'w')
     self.dset = self.f.create_dataset('x',dtype='S10')
     self.empty_obj = h5py.Empty(np.dtype("S10"))
Example #9
0
    def create_attr(self,
                    name: str,
                    value: Optional[Any],
                    log: Optional[Logger] = None):
        """Adds an attribute to the current object.

        WARNING: Any existing attribute will be overwritten!

        This method will coerce value to a special 'Empty' type used by HDF5 if the value
        provided is zero-length or None. For more on Attributes and Empty types, see [1, 2]

        [1] - https://docs.h5py.org/en/stable/high/attr.html#attributes
        [2] - https://docs.h5py.org/en/stable/high/dataset.html?highlight=Empty#creating-and-reading-empty-or-null-datasets-and-attributes

        Parameters
        ----------
        name : str
            Name of the attribute.
        value : Optional[Any]
            Value of the attribute. This method will coerce this value
            to a special Empty object if it's zero-length or None [2].
        """

        if value is None or value == "" or (hasattr(value, "__len__")
                                            and len(value) < 1):
            empty = h5py.Empty(dtype=np.uint8)
            self.get_attrs().create(name, empty)
        elif isinstance(value, HDF5IsAttr):
            attr_value = value.as_attr()
            self.get_attrs().create(name, value, dtype=hdf5_dtype(attr_value))
        else:
            self.get_attrs().create(name, value, dtype=hdf5_dtype(value))
Example #10
0
 def setUp(self):
     BaseAttrs.setUp(self)
     sid = h5s.create(h5s.NULL)
     tid = h5t.C_S1.copy()
     tid.set_size(10)
     aid = h5a.create(self.f.id, b'x', tid, sid)
     self.empty_obj = h5py.Empty(np.dtype("S10"))
Example #11
0
 def setUp(self):
     TestCase.setUp(self)
     sid = h5py.h5s.create(h5py.h5s.NULL)
     tid = h5py.h5t.C_S1.copy()
     tid.set_size(10)
     dsid = h5py.h5d.create(self.f.id, b'x', tid, sid)
     self.dset = h5py.Dataset(dsid)
     self.empty_obj = h5py.Empty(np.dtype("S10"))
Example #12
0
    def add_training(self, hfile):
        """Add training data."""
        group = hfile.create_group('/classifier/training')

        if self.events is None:
            group.create_dataset('events', dtype=h5py.Empty('f'))
        else:
            group.create_dataset('events',
                                 data=self.events,
                                 **self.__compression)

        if self.sample_weight is None:
            group.create_dataset('sample_weight', dtype=h5py.Empty('f'))
        else:
            group.create_dataset('sample_weight',
                                 data=self.sample_weight,
                                 **self.__compression)
Example #13
0
def initialize_HDF5(file=None):
    '''
    Create an HDF5 file with relevant empty structures.
    '''
    if file is None:
        #launch get_save_file
        file, _ = get_save_file('*.OpenRS')

    if file is None:
        return

    with h5py.File(file, 'w') as f:
        f.attrs['date_created'] = datetime.today().strftime(
            '%Y-%m-%d %H:%M:%S')
        # f.attrs['version'] = __version__

        #create model group
        # model = f.create_group("model_data")
        boundary = f.create_group("model_boundary")
        boundary.create_dataset("points", data=h5py.Empty("f"))
        boundary.create_dataset("vertices", data=h5py.Empty("f"))
        boundary.create_dataset("transform", data=h5py.Empty("f"))

        #create fiducial group
        fid = f.create_group("fiducials")
        fid.create_dataset("enabled", data=h5py.Empty("bool"))
        fid.create_dataset("points", data=h5py.Empty("f"))

        #create measurement group
        meas = f.create_group("measurement_points")
        meas.create_dataset("enabled", data=h5py.Empty("bool"))
        meas.create_dataset("points", data=h5py.Empty("f"))

        #create the sample group
        sample = f.create_group("sample")
        sample.create_dataset("points", data=h5py.Empty("f"))
        sample.create_dataset("vertices", data=h5py.Empty("f"))
        sample.create_dataset("transform", data=h5py.Empty("f"))

        #create sgv group
        sgv = f.create_group("sgv")

        f.attrs['date_modified'] = datetime.today().strftime(
            '%Y-%m-%d %H:%M:%S')

    return file
Example #14
0
    def setUp(self):
        super().setUp()

        with h5py.File(os.path.join(self.notebook_dir, "test_file.h5"), "w") as h5file:
            h5file["oneD_dataset"] = ONE_D
            h5file["twoD_dataset"] = TWO_D
            h5file["threeD_dataset"] = THREE_D
            h5file["complex"] = COMPLEX
            h5file["scalar"] = SCALAR
            h5file["empty"] = h5py.Empty(">f8")
 def _init_index(self, group):
     if self.INDEX_DATASET not in group:
         indices_dataset = group.create_dataset(self.INDEX_DATASET,
                                                data=h5py.Empty("f"))
     else:
         indices_dataset = group[self.INDEX_DATASET]
     self.indices = dict(indices_dataset.attrs.items())
     for index in self.DEFAULT_INDICES:
         if index not in self.indices:
             self._create_index_dataset(group, indices_dataset, index)
Example #16
0
 def save(self, h5group):
     r"""!
     \param h5group Base HDF5 group. Data is stored in subgroup `h5group/self.savePath`.
     """
     subGroup = createH5Group(h5group, self.savePath)
     if self.transform is None:
         subGroup["transform"] = h5.Empty(dtype="complex")
     else:
         subGroup["transform"] = self.transform
     for name, correlator in self.correlators.items():
         subGroup[name] = correlator
Example #17
0
    def setUp(self):
        super().setUp()

        with h5py.File(os.path.join(self.notebook_dir, 'test_file.h5'),
                       'w') as h5file:
            h5file['oneD_dataset'] = ONE_D
            h5file['twoD_dataset'] = TWO_D
            h5file['threeD_dataset'] = THREE_D
            h5file['complex'] = COMPLEX
            h5file['scalar'] = SCALAR
            h5file['empty'] = h5py.Empty('>f8')
Example #18
0
def store_subdimensions(group, data, dtype, prefix=None):
    """Creates datasets in given group with data

    :param h5py.Group group: Group where to add the datasets
    :param numpy.ndarray data: The data to use
    :param Union[numpy.dtype,str] dtype:
    :param Union[str,None] prefix: String to use as datasets name prefix
    """
    try:
        dtype = numpy.dtype(dtype)
    except TypeError as e:
        logger.error("Cannot create datasets for dtype: %s", str(dtype))
        logger.error(e)
        return

    if prefix is None:
        prefix = str(dtype)

    if hasattr(h5py, "Empty"):
        basename = prefix + "_empty"
        try:
            group[basename] = h5py.Empty(dtype=numpy.dtype(dtype))
        except (RuntimeError, ValueError) as e:
            logger.error("Error while creating %s in %s" %
                         (basename, str(group)))
            logger.error(e)
    else:
        logger.warning("h5py.Empty not available")

    data = data.astype(dtype)
    data.shape = -1
    basename = prefix + "_d0"
    try:
        group[basename] = data[0]
    except RuntimeError as e:
        logger.error("Error while creating %s in %s" % (basename, str(group)))
        logger.error(e)

    shapes = [10, 4, 4, 4]
    for i in range(1, 4):
        shape = shapes[:i]
        shape.append(-1)
        reversed(shape)
        shape = tuple(shape)
        data.shape = shape
        basename = prefix + "_d%d" % i
        try:
            group[basename] = data
        except RuntimeError as e:
            logger.error("Error while creating %s in %s" %
                         (basename, str(group)))
            logger.error(e)
Example #19
0
 def setup(self,
           memoryAllowance,
           expectedNConfigs,
           file,
           maxBufferSize=None):
     res = super().setup(memoryAllowance, expectedNConfigs, file,
                         maxBufferSize)
     with open_or_pass_file(file, None, "a") as h5f:
         if self.transform is None:
             h5f[self.savePath]["transform"] = h5.Empty(dtype="complex")
         else:
             h5f[self.savePath]["transform"] = self.transform
     return res
Example #20
0
def none_to_hdf5(key, kind, h5file):
    """Save a None variable or empty dictionary to hdf5

    Inputs:
      key: Dataset key
      kind: type of variable:
        * 'scalar': for a scalar, i.e. None
        * 'list': for a list, i.e. []
        * 'dict': for a dict, i.e. {}
      h5file: the hdf5 file
    """
    h5file.create_dataset(key, data=h5py.Empty('f'))
    h5file[key].attrs['type'] = kind
Example #21
0
def add_attributes(node):
    # Integer
    node.attrs.create('scalar_int', np.int32(123), dtype='i4')
    node.attrs.create('1D_int', np.arange(3), dtype='i4')
    node.attrs.create('2D_int', np.arange(6).reshape(2, 3), dtype='i4')
    node.attrs.create('empty_int', h5py.Empty('i4'), dtype='i4')

    # Float
    node.attrs.create('scalar_float', np.float32(123.45), dtype='f4')
    node.attrs.create('1D_float', np.arange(3), dtype='f4')
    node.attrs.create('2D_float', np.arange(6).reshape(2, 3), dtype='f4')
    node.attrs.create('empty_float', h5py.Empty('f4'), dtype='f4')

    # String
    data = np.str("hello")
    ascii = h5py.special_dtype(vlen=bytes)
    node.attrs.create('scalar_string', data, dtype=ascii)
    node.attrs.create("empty_string", h5py.Empty(ascii))

    data = np.arange(6).reshape(2, 3).astype(bytes)
    utf8 = h5py.special_dtype(vlen=str)
    node.attrs.create('2d_string', data=data, dtype=utf8)
Example #22
0
 def __call__(self, key, value):
     if value is None:
         # use Empty to represent None
         if h5py.version.version_tuple < (2, 7, 0):
             raise RuntimeError(
                 'h5py>=2.7.0 is required to serialize None.')
         arr = h5py.Empty('f')
         compression = None
     else:
         arr = _cpu._to_cpu(value)
         compression = None if arr.size <= 1 else self.compression
     self.group.create_dataset(key, data=arr, compression=compression)
     return value
Example #23
0
    def test_scalar_dataset(self):
        ds = self.f.create_dataset("scalar", data=1.0, dtype='f4')
        sid = h5py.h5s.create(h5py.h5s.SCALAR)

        # Deselected
        sid.select_none()
        ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid)
        assert ds[ref] == h5py.Empty(np.dtype('f4'))

        # Selected
        sid.select_all()
        ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid)
        assert ds[ref] == ds[()]
Example #24
0
def data_to_h5(data, grp, key, encoder='yaml'):
    if data is None:
        grp.create_dataset(key, data=h5py.Empty("f"))
        grp[key].attrs['encoded'] = 'None'

    elif type(data) == dict:
        new_grp = grp.create_group(key)
        new_grp.attrs['encoded'] = 'dict'
        dict_to_h5(data, new_grp)

    elif type(data) == str:
        grp.create_dataset(key, data=np.string_(data))
        grp[key].attrs['encoded'] = 'str'

    elif type(data) == pd.core.series.Series:
        new_grp = grp.create_group(key)
        new_grp.attrs['encoded'] = 'Series'
        new_grp.create_dataset('data', data=np.array(data))
        index_to_h5(data.index, 'index', new_grp)
        new_grp.create_dataset('name', data=np.string_(data.name))

    elif type(data) == pd.core.frame.DataFrame:
        new_grp = grp.create_group(key)
        new_grp.attrs['encoded'] = 'DataFrame'
        index_to_h5(data.index, 'index', new_grp)
        index_to_h5(data.columns, 'columns', new_grp)
        new_grp.create_dataset('data', data=np.array(data))

    else:
        try:
            grp.create_dataset(key, data=data)
            grp[key].attrs['encoded'] = 'data'

        except TypeError:
            print(f"TypeError, encoding {key} using {encoder}")
            try:
                if encoder == 'yaml':
                    string = np.string_(yaml.dump(data))
                elif encoder == 'json':
                    string = np.string_(json.dumps(data))
                grp.create_dataset(key, data=np.string_(string))
                grp[key].attrs['encoded'] = encoder
            except Exception as e:
                print(e)
                try:
                    grp.create_dataset(key, data=np.string_(data))
                    grp[key].attrs['encoded'] = 'unknown'
                except Exception as e:
                    print(e)
                    print(f"Unable to dump {key}")
Example #25
0
def test_attrs_api(tmp_local_or_remote_netcdf):
    with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as ds:
        ds.attrs["conventions"] = "CF"
        ds.attrs["empty_string"] = h5py.Empty(dtype=np.dtype("|S1"))
        ds.dimensions["x"] = 1
        v = ds.create_variable("x", ("x",), "i4")
        v.attrs.update({"units": "meters", "foo": "bar"})
    assert ds._closed
    with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds:
        assert len(ds.attrs) == 2
        assert dict(ds.attrs) == {"conventions": "CF", "empty_string": b""}
        assert list(ds.attrs) == ["conventions", "empty_string"]
        assert dict(ds["x"].attrs) == {"units": "meters", "foo": "bar"}
        assert len(ds["x"].attrs) == 2
        assert sorted(ds["x"].attrs) == ["foo", "units"]
Example #26
0
 def __call__(self, key, value):
     ret = value
     if isinstance(value, cuda.ndarray):
         value = cuda.to_cpu(value)
     if value is None:
         # use Empty to represent None
         if h5py.version.version_tuple < (2, 7, 0):
             raise RuntimeError(
                 'h5py>=2.7.0 is required to serialize None.')
         arr = h5py.Empty('f')
         compression = None
     else:
         arr = numpy.asarray(value)
         compression = None if arr.size <= 1 else self.compression
     self.group.create_dataset(key, data=arr, compression=compression)
     return ret
Example #27
0
    def setUp(self):
        self.data = numpy.random.uniform(-1, 1, (2, 3)).astype(numpy.float32)

        fd, path = tempfile.mkstemp()
        os.close(fd)
        self.temp_file_path = path
        with h5py.File(path, 'w') as f:
            f.require_group('x')
            f.create_dataset('y', data=self.data)
            f.create_dataset('z', data=numpy.asarray(10))
            # h5py.Empty is introduced from 2.7.0
            if h5py.version.version_tuple >= (2, 7, 0):
                f.create_dataset('w', data=h5py.Empty('f'))

        self.hdf5file = h5py.File(path, 'r')
        self.deserializer = hdf5.HDF5Deserializer(self.hdf5file)
Example #28
0
    def _check_data(data_list):
        """
        Check if the data to be stores is in a good format. If not adapt it.
        """
        if data_list is None:
            raise FileStructureException("Could not store null data")

        if not (isinstance(data_list, list) or isinstance(data_list, numpy.ndarray)):
            raise FileStructureException("Invalid data type. Could not store data of type:" + str(type(data_list)))

        data_to_store = data_list
        if isinstance(data_to_store, list):
            data_to_store = numpy.array(data_list)
        if data_to_store.shape == ():
            data_to_store = hdf5.Empty("f")
        return data_to_store
Example #29
0
def recursive_save_to_h5(h5_file, path, item):
    if isinstance(item, dict):
        for key, value in item.items():
            recursive_save_to_h5(h5_file, f"{path}{key}/", value)
    elif isinstance(item, list):
        if len(item) > 0:
            for key, value in enumerate(item):
                recursive_save_to_h5(h5_file, f"{path}{key}/", value)
        else:
            h5_file.create_group(f"{path}/")
    elif item is None:
        h5_file[path] = h5py.Empty("f")
    else:
        h5_file[path] = item

    h5_file[path].attrs["type"] = type(item).__name__
Example #30
0
def recursive_load_from_h5(h5_file, path):
    if h5_file[path].attrs["type"] == dict.__name__:
        return_dict = {}
        for key, value in h5_file[path].items():
            return_dict[key] = recursive_load_from_h5(h5_file, f"{path}/{key}")
        return return_dict

    elif h5_file[path].attrs["type"] == list.__name__:
        return_list = []
        for key, value in h5_file[path].items():
            return_list.append(recursive_load_from_h5(h5_file,
                                                      f"{path}/{key}"))

        return return_list
    elif h5_file[path] is h5py.Empty("f"):
        return None
    else:
        return h5_file[path][()]