def add_classifier(self, hfile): """Create classifier group and add data.""" cgroup = hfile.create_group('/classifier') cgroup.create_dataset('weights', data=self.weights, **self.__compression) cgroup.create_dataset('intercept', data=[self.classifier.intercept_]) info_group = cgroup.create_group('info') addstring = partial(self.addstring, info_group) addstring('classname', self.classname) addstring('subject', self.subject) params = json.dumps(self.params) addstring('params', params, dtype='|S{:d}'.format(len(params))) if self.roc is None: info_group.create_dataset('roc', dtype=h5py.Empty('<f8')) else: info_group.create_dataset('roc', data=self.roc, **self.__compression) if self.auc is None: info_group.create_dataset('auc', dtype=h5py.Empty('<f8')) else: info_group.create_dataset('auc', data=[self.auc], **self.__compression)
def test_empty(self): # https://github.com/h5py/h5py/issues/1540 """ Create attribute with h5py.Empty value """ self.f.attrs.create('empty', h5py.Empty('f')) self.assertEqual(self.f.attrs['empty'], h5py.Empty('f')) self.f.attrs.create('empty', h5py.Empty(None)) self.assertEqual(self.f.attrs['empty'], h5py.Empty(None))
def _save_run_results_hdf(outfile, results): # results: model_id timestamp class_labels (bin + roi_numbers) # input_images output_classes output_scores with h5.File(outfile, 'w') as f: meta = f.create_dataset('metadata', data=h5.Empty('f')) meta.attrs['version'] = results['version'] meta.attrs['model_id'] = results['model_id'] meta.attrs['timestamp'] = results['timestamp'] f.create_dataset('output_classes', data=results['output_classes'], compression='gzip', dtype='float16') f.create_dataset('output_scores', data=results['output_scores'], compression='gzip', dtype='float16') f.create_dataset('class_labels', data=np.string_(results['class_labels']), compression='gzip', dtype=h5.string_dtype()) if results['bin_id']: meta.attrs['bin_id'] = results['bin_id'] f.create_dataset('roi_numbers', data=results['roi_numbers'], compression='gzip', dtype='uint16') else: f.create_dataset('input_images', data=np.string_(results['input_images']), compression='gzip', dtype=h5.string_dtype())
def _write_to_group(group, dictionary): """ writes a dictionary to a hdf5 group, which can recurse""" for key in dictionary.keys(): value = dictionary[key] if isinstance(value, str): # needs another conversion to string to catch weird subtypes # like numpy.str_ group.attrs[key] = str(value) elif isinstance(value, np.ndarray): if str(value.dtype)[:2] == '<U': group[key] = value.astype('S') else: group[key] = value elif isinstance(value, list): _write_list(group, key, value) elif isinstance(value, dict): subgroup = group.create_group(key) _write_to_group(subgroup, value) elif value is None: group[key] = h5py.Empty("f") elif isinstance(value, Iterable): if isinstance(value[0], str): group.attrs[key] = value else: group[key] = value
def setUp(self): super().setUp() with h5py.File(os.path.join(self.notebook_dir, "test_file.h5"), "w") as h5file: # Empty group h5file.create_group("empty_group") # Group with 2 children grp = h5file.create_group("group_with_children") # A simple dataset grp["dataset_1"] = np.random.random((2, 3, 4)) # and a group with attributes attr_grp = grp.create_group("group_with_attrs") attr_grp.attrs["array_attr"] = np.arange(0, 1, 0.1, dtype=">f4") attr_grp.attrs["bool_attr"] = True attr_grp.attrs["complex_attr"] = np.complex64(1 + 2j) attr_grp.attrs["number_attr"] = np.int32(5676) attr_grp.attrs["string_attr"] = "I am a group" # Scalar dataset h5file["scalar"] = 56 # Empty dataset h5file["empty"] = h5py.Empty(">f8")
def createHdfTestData(): """ Create files with the python h5py module. """ import h5py # Emtpy datasets # See https://docs.h5py.org/en/latest/high/dataset.html#creating-and-reading-empty-or-null-datasets-and-attributes # and https://github.com/h5py/h5py/issues/279#issuecomment-15313062 for a possible use case with h5py.File('empty.h5', 'w') as h5Root: ds = h5Root.create_dataset("emptyDataset", data=h5py.Empty("f")) ds.attrs['Description'] = 'An empty dataset' # Broken in 2.10 but was fixed in 3.0 https://github.com/h5py/h5py/issues/1540 ds = h5Root.create_dataset("hasEmptyAttribute", data=np.arange(5)) ds.attrs['Description'] = 'An regular dataset with an empty attribute' ds.attrs["emptyAttr"] = h5py.Empty(np.float32)
def _save_validation_results_hdf(self, outfile, results): attrib_data = ['model_id', 'timestamp'] attrib_data += 'f1_weighted recall_weighted precision_weighted f1_macro recall_macro precision_macro'.split( ) int_data = [ 'input_classes', 'output_classes' ] + 'counts_perclass val_counts_perclass train_counts_perclass'.split( ) int_data.extend([ 'classes_by_' + stat for stat in 'f1 recall precision count'.split() ]) string_data = ['class_labels', 'image_fullpaths', 'image_basenames'] with h5.File(outfile, 'w') as f: meta = f.create_dataset('metadata', data=h5.Empty('f')) for series in results: if series in attrib_data: meta.attrs[series] = results[series] elif series in string_data: f.create_dataset(series, data=np.string_(results[series]), compression='gzip', dtype=h5.string_dtype()) elif series in int_data: f.create_dataset(series, data=results[series], compression='gzip', dtype='int16') elif isinstance(results[series], np.ndarray): f.create_dataset(series, data=results[series], compression='gzip', dtype='float16') else: raise UserWarning( 'hdf results: WE MISSED THIS ONE: {}'.format(series))
def setUp(self): TestCase.setUp(self) filename = self.getFileName("dataset_testempty") print("filename:", filename) self.f = h5py.File(filename, 'w') self.dset = self.f.create_dataset('x',dtype='S10') self.empty_obj = h5py.Empty(np.dtype("S10"))
def create_attr(self, name: str, value: Optional[Any], log: Optional[Logger] = None): """Adds an attribute to the current object. WARNING: Any existing attribute will be overwritten! This method will coerce value to a special 'Empty' type used by HDF5 if the value provided is zero-length or None. For more on Attributes and Empty types, see [1, 2] [1] - https://docs.h5py.org/en/stable/high/attr.html#attributes [2] - https://docs.h5py.org/en/stable/high/dataset.html?highlight=Empty#creating-and-reading-empty-or-null-datasets-and-attributes Parameters ---------- name : str Name of the attribute. value : Optional[Any] Value of the attribute. This method will coerce this value to a special Empty object if it's zero-length or None [2]. """ if value is None or value == "" or (hasattr(value, "__len__") and len(value) < 1): empty = h5py.Empty(dtype=np.uint8) self.get_attrs().create(name, empty) elif isinstance(value, HDF5IsAttr): attr_value = value.as_attr() self.get_attrs().create(name, value, dtype=hdf5_dtype(attr_value)) else: self.get_attrs().create(name, value, dtype=hdf5_dtype(value))
def setUp(self): BaseAttrs.setUp(self) sid = h5s.create(h5s.NULL) tid = h5t.C_S1.copy() tid.set_size(10) aid = h5a.create(self.f.id, b'x', tid, sid) self.empty_obj = h5py.Empty(np.dtype("S10"))
def setUp(self): TestCase.setUp(self) sid = h5py.h5s.create(h5py.h5s.NULL) tid = h5py.h5t.C_S1.copy() tid.set_size(10) dsid = h5py.h5d.create(self.f.id, b'x', tid, sid) self.dset = h5py.Dataset(dsid) self.empty_obj = h5py.Empty(np.dtype("S10"))
def add_training(self, hfile): """Add training data.""" group = hfile.create_group('/classifier/training') if self.events is None: group.create_dataset('events', dtype=h5py.Empty('f')) else: group.create_dataset('events', data=self.events, **self.__compression) if self.sample_weight is None: group.create_dataset('sample_weight', dtype=h5py.Empty('f')) else: group.create_dataset('sample_weight', data=self.sample_weight, **self.__compression)
def initialize_HDF5(file=None): ''' Create an HDF5 file with relevant empty structures. ''' if file is None: #launch get_save_file file, _ = get_save_file('*.OpenRS') if file is None: return with h5py.File(file, 'w') as f: f.attrs['date_created'] = datetime.today().strftime( '%Y-%m-%d %H:%M:%S') # f.attrs['version'] = __version__ #create model group # model = f.create_group("model_data") boundary = f.create_group("model_boundary") boundary.create_dataset("points", data=h5py.Empty("f")) boundary.create_dataset("vertices", data=h5py.Empty("f")) boundary.create_dataset("transform", data=h5py.Empty("f")) #create fiducial group fid = f.create_group("fiducials") fid.create_dataset("enabled", data=h5py.Empty("bool")) fid.create_dataset("points", data=h5py.Empty("f")) #create measurement group meas = f.create_group("measurement_points") meas.create_dataset("enabled", data=h5py.Empty("bool")) meas.create_dataset("points", data=h5py.Empty("f")) #create the sample group sample = f.create_group("sample") sample.create_dataset("points", data=h5py.Empty("f")) sample.create_dataset("vertices", data=h5py.Empty("f")) sample.create_dataset("transform", data=h5py.Empty("f")) #create sgv group sgv = f.create_group("sgv") f.attrs['date_modified'] = datetime.today().strftime( '%Y-%m-%d %H:%M:%S') return file
def setUp(self): super().setUp() with h5py.File(os.path.join(self.notebook_dir, "test_file.h5"), "w") as h5file: h5file["oneD_dataset"] = ONE_D h5file["twoD_dataset"] = TWO_D h5file["threeD_dataset"] = THREE_D h5file["complex"] = COMPLEX h5file["scalar"] = SCALAR h5file["empty"] = h5py.Empty(">f8")
def _init_index(self, group): if self.INDEX_DATASET not in group: indices_dataset = group.create_dataset(self.INDEX_DATASET, data=h5py.Empty("f")) else: indices_dataset = group[self.INDEX_DATASET] self.indices = dict(indices_dataset.attrs.items()) for index in self.DEFAULT_INDICES: if index not in self.indices: self._create_index_dataset(group, indices_dataset, index)
def save(self, h5group): r"""! \param h5group Base HDF5 group. Data is stored in subgroup `h5group/self.savePath`. """ subGroup = createH5Group(h5group, self.savePath) if self.transform is None: subGroup["transform"] = h5.Empty(dtype="complex") else: subGroup["transform"] = self.transform for name, correlator in self.correlators.items(): subGroup[name] = correlator
def setUp(self): super().setUp() with h5py.File(os.path.join(self.notebook_dir, 'test_file.h5'), 'w') as h5file: h5file['oneD_dataset'] = ONE_D h5file['twoD_dataset'] = TWO_D h5file['threeD_dataset'] = THREE_D h5file['complex'] = COMPLEX h5file['scalar'] = SCALAR h5file['empty'] = h5py.Empty('>f8')
def store_subdimensions(group, data, dtype, prefix=None): """Creates datasets in given group with data :param h5py.Group group: Group where to add the datasets :param numpy.ndarray data: The data to use :param Union[numpy.dtype,str] dtype: :param Union[str,None] prefix: String to use as datasets name prefix """ try: dtype = numpy.dtype(dtype) except TypeError as e: logger.error("Cannot create datasets for dtype: %s", str(dtype)) logger.error(e) return if prefix is None: prefix = str(dtype) if hasattr(h5py, "Empty"): basename = prefix + "_empty" try: group[basename] = h5py.Empty(dtype=numpy.dtype(dtype)) except (RuntimeError, ValueError) as e: logger.error("Error while creating %s in %s" % (basename, str(group))) logger.error(e) else: logger.warning("h5py.Empty not available") data = data.astype(dtype) data.shape = -1 basename = prefix + "_d0" try: group[basename] = data[0] except RuntimeError as e: logger.error("Error while creating %s in %s" % (basename, str(group))) logger.error(e) shapes = [10, 4, 4, 4] for i in range(1, 4): shape = shapes[:i] shape.append(-1) reversed(shape) shape = tuple(shape) data.shape = shape basename = prefix + "_d%d" % i try: group[basename] = data except RuntimeError as e: logger.error("Error while creating %s in %s" % (basename, str(group))) logger.error(e)
def setup(self, memoryAllowance, expectedNConfigs, file, maxBufferSize=None): res = super().setup(memoryAllowance, expectedNConfigs, file, maxBufferSize) with open_or_pass_file(file, None, "a") as h5f: if self.transform is None: h5f[self.savePath]["transform"] = h5.Empty(dtype="complex") else: h5f[self.savePath]["transform"] = self.transform return res
def none_to_hdf5(key, kind, h5file): """Save a None variable or empty dictionary to hdf5 Inputs: key: Dataset key kind: type of variable: * 'scalar': for a scalar, i.e. None * 'list': for a list, i.e. [] * 'dict': for a dict, i.e. {} h5file: the hdf5 file """ h5file.create_dataset(key, data=h5py.Empty('f')) h5file[key].attrs['type'] = kind
def add_attributes(node): # Integer node.attrs.create('scalar_int', np.int32(123), dtype='i4') node.attrs.create('1D_int', np.arange(3), dtype='i4') node.attrs.create('2D_int', np.arange(6).reshape(2, 3), dtype='i4') node.attrs.create('empty_int', h5py.Empty('i4'), dtype='i4') # Float node.attrs.create('scalar_float', np.float32(123.45), dtype='f4') node.attrs.create('1D_float', np.arange(3), dtype='f4') node.attrs.create('2D_float', np.arange(6).reshape(2, 3), dtype='f4') node.attrs.create('empty_float', h5py.Empty('f4'), dtype='f4') # String data = np.str("hello") ascii = h5py.special_dtype(vlen=bytes) node.attrs.create('scalar_string', data, dtype=ascii) node.attrs.create("empty_string", h5py.Empty(ascii)) data = np.arange(6).reshape(2, 3).astype(bytes) utf8 = h5py.special_dtype(vlen=str) node.attrs.create('2d_string', data=data, dtype=utf8)
def __call__(self, key, value): if value is None: # use Empty to represent None if h5py.version.version_tuple < (2, 7, 0): raise RuntimeError( 'h5py>=2.7.0 is required to serialize None.') arr = h5py.Empty('f') compression = None else: arr = _cpu._to_cpu(value) compression = None if arr.size <= 1 else self.compression self.group.create_dataset(key, data=arr, compression=compression) return value
def test_scalar_dataset(self): ds = self.f.create_dataset("scalar", data=1.0, dtype='f4') sid = h5py.h5s.create(h5py.h5s.SCALAR) # Deselected sid.select_none() ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid) assert ds[ref] == h5py.Empty(np.dtype('f4')) # Selected sid.select_all() ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid) assert ds[ref] == ds[()]
def data_to_h5(data, grp, key, encoder='yaml'): if data is None: grp.create_dataset(key, data=h5py.Empty("f")) grp[key].attrs['encoded'] = 'None' elif type(data) == dict: new_grp = grp.create_group(key) new_grp.attrs['encoded'] = 'dict' dict_to_h5(data, new_grp) elif type(data) == str: grp.create_dataset(key, data=np.string_(data)) grp[key].attrs['encoded'] = 'str' elif type(data) == pd.core.series.Series: new_grp = grp.create_group(key) new_grp.attrs['encoded'] = 'Series' new_grp.create_dataset('data', data=np.array(data)) index_to_h5(data.index, 'index', new_grp) new_grp.create_dataset('name', data=np.string_(data.name)) elif type(data) == pd.core.frame.DataFrame: new_grp = grp.create_group(key) new_grp.attrs['encoded'] = 'DataFrame' index_to_h5(data.index, 'index', new_grp) index_to_h5(data.columns, 'columns', new_grp) new_grp.create_dataset('data', data=np.array(data)) else: try: grp.create_dataset(key, data=data) grp[key].attrs['encoded'] = 'data' except TypeError: print(f"TypeError, encoding {key} using {encoder}") try: if encoder == 'yaml': string = np.string_(yaml.dump(data)) elif encoder == 'json': string = np.string_(json.dumps(data)) grp.create_dataset(key, data=np.string_(string)) grp[key].attrs['encoded'] = encoder except Exception as e: print(e) try: grp.create_dataset(key, data=np.string_(data)) grp[key].attrs['encoded'] = 'unknown' except Exception as e: print(e) print(f"Unable to dump {key}")
def test_attrs_api(tmp_local_or_remote_netcdf): with h5netcdf.File(tmp_local_or_remote_netcdf, "w") as ds: ds.attrs["conventions"] = "CF" ds.attrs["empty_string"] = h5py.Empty(dtype=np.dtype("|S1")) ds.dimensions["x"] = 1 v = ds.create_variable("x", ("x",), "i4") v.attrs.update({"units": "meters", "foo": "bar"}) assert ds._closed with h5netcdf.File(tmp_local_or_remote_netcdf, "r") as ds: assert len(ds.attrs) == 2 assert dict(ds.attrs) == {"conventions": "CF", "empty_string": b""} assert list(ds.attrs) == ["conventions", "empty_string"] assert dict(ds["x"].attrs) == {"units": "meters", "foo": "bar"} assert len(ds["x"].attrs) == 2 assert sorted(ds["x"].attrs) == ["foo", "units"]
def __call__(self, key, value): ret = value if isinstance(value, cuda.ndarray): value = cuda.to_cpu(value) if value is None: # use Empty to represent None if h5py.version.version_tuple < (2, 7, 0): raise RuntimeError( 'h5py>=2.7.0 is required to serialize None.') arr = h5py.Empty('f') compression = None else: arr = numpy.asarray(value) compression = None if arr.size <= 1 else self.compression self.group.create_dataset(key, data=arr, compression=compression) return ret
def setUp(self): self.data = numpy.random.uniform(-1, 1, (2, 3)).astype(numpy.float32) fd, path = tempfile.mkstemp() os.close(fd) self.temp_file_path = path with h5py.File(path, 'w') as f: f.require_group('x') f.create_dataset('y', data=self.data) f.create_dataset('z', data=numpy.asarray(10)) # h5py.Empty is introduced from 2.7.0 if h5py.version.version_tuple >= (2, 7, 0): f.create_dataset('w', data=h5py.Empty('f')) self.hdf5file = h5py.File(path, 'r') self.deserializer = hdf5.HDF5Deserializer(self.hdf5file)
def _check_data(data_list): """ Check if the data to be stores is in a good format. If not adapt it. """ if data_list is None: raise FileStructureException("Could not store null data") if not (isinstance(data_list, list) or isinstance(data_list, numpy.ndarray)): raise FileStructureException("Invalid data type. Could not store data of type:" + str(type(data_list))) data_to_store = data_list if isinstance(data_to_store, list): data_to_store = numpy.array(data_list) if data_to_store.shape == (): data_to_store = hdf5.Empty("f") return data_to_store
def recursive_save_to_h5(h5_file, path, item): if isinstance(item, dict): for key, value in item.items(): recursive_save_to_h5(h5_file, f"{path}{key}/", value) elif isinstance(item, list): if len(item) > 0: for key, value in enumerate(item): recursive_save_to_h5(h5_file, f"{path}{key}/", value) else: h5_file.create_group(f"{path}/") elif item is None: h5_file[path] = h5py.Empty("f") else: h5_file[path] = item h5_file[path].attrs["type"] = type(item).__name__
def recursive_load_from_h5(h5_file, path): if h5_file[path].attrs["type"] == dict.__name__: return_dict = {} for key, value in h5_file[path].items(): return_dict[key] = recursive_load_from_h5(h5_file, f"{path}/{key}") return return_dict elif h5_file[path].attrs["type"] == list.__name__: return_list = [] for key, value in h5_file[path].items(): return_list.append(recursive_load_from_h5(h5_file, f"{path}/{key}")) return return_list elif h5_file[path] is h5py.Empty("f"): return None else: return h5_file[path][()]