def test_softlinks(self): """ Broken softlinks are contained, but their members are not """ self.f.create_group('grp') self.f['/grp/soft'] = h5py.SoftLink('/mongoose') self.f['/grp/external'] = h5py.ExternalLink('mongoose.hdf5', '/mongoose') self.assertIn('/grp/soft', self.f) self.assertNotIn('/grp/soft/something', self.f) self.assertIn('/grp/external', self.f) self.assertNotIn('/grp/external/something', self.f)
def end_of_run(): # Close CXI file W.close() if ipc.mpi.is_main_worker(): # Reopen CXI file to append with more information nessesary # for ptychography datasets, see http://www.cxidb.org/cxi.html f = h5py.File(filename, "r+") # Already existing fields entry_1 = f['entry_1'] instrument_1 = f['entry_1']['instrument_1'] detector_1 = f['entry_1']['instrument_1']['detector_1'] sample_1 = f['entry_1']['sample_1'] geometry_1 = f['entry_1']['sample_1']['geometry_1'] # Add new data fields f.create_dataset("cxi_version", data=140) source_1 = instrument_1.create_group("source_1") source_1.create_dataset("energy", data=photon_energy_J) # in J detector_1.create_dataset("distance", data=det_distance) detector_1.create_dataset("x_pixel_size", data=det_pixelsize) detector_1.create_dataset("y_pixel_size", data=det_pixelsize) detector_1["translation"] = h5py.SoftLink( '/entry_1/sample_1/geometry_1/translation') detector_1.create_dataset("corner_position", data=corner_position) data_1 = entry_1.create_group("data_1") data_1["data"] = h5py.SoftLink('/entry_1/instrument_1/detector_1/data') data_1["translation"] = h5py.SoftLink( '/entry_1/sample_1/geometry_1/translation') # These are optional data that should be provided (if known) # ---------------------------------------------------------- source_1.create_dataset("illumination", data=sim.get_illumination()) #detector_1.create_dataset("Fillumination_mask", data=illumination_intensities_mask) #detector_1.create_dataset("solution", data=sim.obj) #detector_1.create_dataset("initial_image",data=initial_image) # Close CXI file and exit f.close()
def createResource(cls, directory): filename = os.path.join(directory, "base.h5") extH5FileName = os.path.join(directory, "base__external.h5") extDatFileName = os.path.join(directory, "base__external.dat") externalh5 = h5py.File(extH5FileName, mode="w") externalh5["target/dataset"] = 50 externalh5["target/link"] = h5py.SoftLink("/target/dataset") externalh5["/ext/vds0"] = [0, 1] externalh5["/ext/vds1"] = [2, 3] externalh5.close() numpy.array([0, 1, 10, 10, 2, 3]).tofile(extDatFileName) h5 = h5py.File(filename, mode="w") h5["group/dataset"] = 50 h5["link/soft_link"] = h5py.SoftLink("/group/dataset") h5["link/soft_link_to_group"] = h5py.SoftLink("/group") h5["link/soft_link_to_link"] = h5py.SoftLink("/link/soft_link") h5["link/soft_link_to_file"] = h5py.SoftLink("/") h5["group/soft_link_relative"] = h5py.SoftLink("dataset") h5["link/external_link"] = h5py.ExternalLink(extH5FileName, "/target/dataset") h5["link/external_link_to_link"] = h5py.ExternalLink( extH5FileName, "/target/link") h5["broken_link/external_broken_file"] = h5py.ExternalLink( extH5FileName + "_not_exists", "/target/link") h5["broken_link/external_broken_link"] = h5py.ExternalLink( extH5FileName, "/target/not_exists") h5["broken_link/soft_broken_link"] = h5py.SoftLink("/group/not_exists") h5["broken_link/soft_link_to_broken_link"] = h5py.SoftLink( "/group/not_exists") layout = h5py.VirtualLayout((2, 2), dtype=int) layout[0] = h5py.VirtualSource("base__external.h5", name="/ext/vds0", shape=(2, ), dtype=int) layout[1] = h5py.VirtualSource("base__external.h5", name="/ext/vds1", shape=(2, ), dtype=int) h5.create_group("/ext") h5["/ext"].create_virtual_dataset("virtual", layout) external = [("base__external.dat", 0, 2 * 8), ("base__external.dat", 4 * 8, 2 * 8)] h5["/ext"].create_dataset("raw", shape=(2, 2), dtype=int, external=external) h5.close() return filename
def nxDataAddErrors(data, errors): """ For each dataset in "data", link to the corresponding dataset in "errors". :param h5py.Group data: :param h5py.Group errors: """ for name in data: dest = errors.get(name, None) if dest: data[name + '_errors'] = h5py.SoftLink(dest.name)
def create_links(gsrc, gdes, ctx): # add soft and external links srcid_desobj_map = ctx["srcid_desobj_map"] if ctx["verbose"]: print("create_links: {}".format(gsrc.name)) for title in gsrc: if ctx["verbose"]: print("got link: {}".format(title)) lnk = gsrc.get(title, getlink=True) link_classname = lnk.__class__.__name__ if link_classname == "HardLink": logging.debug("Got hardlink: {} gsrc: {} gdes: {}".format(title, gsrc, gdes)) if title not in gdes: msg = "creating multilink {} with title: {}".format(gdes, title) if ctx["verbose"]: print(msg) logging.info(msg) src_obj_id = gsrc[title].id src_obj_id_hash = src_obj_id.__hash__() logging.debug("got src_obj_id hash: {}".format(src_obj_id_hash)) if src_obj_id_hash in srcid_desobj_map: des_obj = srcid_desobj_map[src_obj_id_hash] logging.debug("creating hardlink to {}".format(des_obj.id.id)) gdes[title] = des_obj else: msg = "could not find map item to src id: {}".format(src_obj_id_hash) logging.warn(msg) if ctx["verbose"]: print("WARNING: " + msg) elif link_classname == "SoftLink": msg = "creating SoftLink({}) with title: {}".format(lnk.path, title) if ctx["verbose"]: print(msg) logging.info(msg) if is_h5py(gdes): soft_link = h5py.SoftLink(lnk.path) else: soft_link = h5pyd.SoftLink(lnk.path) gdes[title] = soft_link elif link_classname == "ExternalLink": msg = "creating ExternalLink({}, {}) with title: {}".format(lnk.filename, lnk.path, title) if ctx["verbose"]: print(msg) logging.info(msg) if is_h5py(gdes): ext_link = h5py.ExternalLink(lnk.filename, lnk.path) else: ext_link = h5pyd.ExternalLink(lnk.filename, lnk.path) gdes[title] = ext_link else: msg = "Unexpected link type: {}".format(lnk.__class__.__name__) logging.warning(msg) if ctx["verbose"]: print(msg)
def add_keyvalue_group(self, dataset_name, data_name): volumes_group, nodes_group = self._get_dataset_groups(dataset_name) # Create the group (i.e. the keyvalue store) volumes_group.create_group( data_name ) # Add a link to this volume in every node for node in nodes_group.values(): node[data_name] = h5py.SoftLink( volumes_group.name + '/' + data_name ) self._f.flush()
def add_property(self, _name_phenomenon, _name_property_set, name, values): """ adds a 1d property """ properties = self.get_property_set( _name_phenomenon, _name_property_set).get("properties") try: # Obtain the number of timesteps prop = self.list_property_sets_prop(_name_phenomenon, _name_property_set) timesteps = self._current_timestep(_name_phenomenon, _name_property_set, prop[0]) prop = properties.create_group(name) if self.is_field_pset(_name_phenomenon, _name_property_set): prop_val = prop.create_group("value") prop_val.create_dataset( "0", (timesteps, len(values), len(values[0])), dtype=numpy.float64, maxshape=(None, len(values), len(values[0]))) v = prop_val["0"] prop["space discretization"] = h5py.SoftLink( "/phenomena/{0}/property_sets/{1}/properties/space discretization" .format(_name_phenomenon, _name_property_set)) else: prop.create_dataset("value", (timesteps, len(values)), dtype=numpy.int64, maxshape=(None, len(values))) v = prop["value"] # Initialise all 'previous' timesteps for ts in xrange(0, timesteps): v[ts, ...] = values prop["time discretization"] = h5py.SoftLink( "/phenomena/{0}/property_sets/{1}/properties/time discretization" .format(_name_phenomenon, _name_property_set)) except Exception as e: raise ValueError("{} ({})".format(e, name))
def get_hdf5_with_external_recursive_links(): global _file_cache ID = "external_recursive_links" if ID in _file_cache: return _file_cache[ID][0].name tmp1 = tempfile.NamedTemporaryFile(prefix=ID + "_", suffix=".h5", delete=True) tmp1.file.close() h5_1 = h5py.File(tmp1.name, "w") tmp2 = tempfile.NamedTemporaryFile(prefix=ID + "_", suffix=".h5", delete=True) tmp2.file.close() h5_2 = h5py.File(tmp2.name, "w") g = h5_1.create_group("group") g.create_dataset("dataset", data=numpy.int64(10)) h5_1["soft_link_to_group"] = h5py.SoftLink("/group") h5_1["external_link_to_link"] = h5py.ExternalLink(tmp2.name, "/soft_link_to_group") h5_1["external_link_to_recursive_link"] = h5py.ExternalLink( tmp2.name, "/external_link_to_recursive_link") h5_1.close() g = h5_2.create_group("group") g.create_dataset("dataset", data=numpy.int64(10)) h5_2["soft_link_to_group"] = h5py.SoftLink("/group") h5_2["external_link_to_link"] = h5py.ExternalLink(tmp1.name, "/soft_link_to_group") h5_2["external_link_to_recursive_link"] = h5py.ExternalLink( tmp1.name, "/external_link_to_recursive_link") h5_2.close() _file_cache[ID] = (tmp1, tmp2) return tmp1.name
def _create_volume(self, dataset_name, uuid, dataname, volume_path, typename, instance_params): # Must read exact bytes. # Apparently rfile.read() just hangs. body_len = self.headers.get("Content-Length") ## Current DVID API does not use metadata json for creating the volume. ## This may change soon... ## #metadata_json = self.rfile.read( int(body_len) ) #try: # voxels_metadata = VoxelsMetadata( metadata_json ) #except ValueError as ex: # raise self.RequestError( httplib.BAD_REQUEST, 'Can\'t create volume. ' # 'Error parsing volume metadata: {}\n' # 'Invalid metadata response body was:\n{}' # ''.format( ex.args[0], metadata_json ) ) #expected_typename = voxels_metadata.determine_dvid_typename() #if typename != expected_typename: # raise self.RequestError( httplib.BAD_REQUEST, # "Cannot create volume. " # "REST typename was {}, but metadata JSON implies typename {}" # "".format( typename, expected_typename ) ) # Instead, the json contains some other parameters that we don't really care about... # But we need to read at least one of them to determine the dimensionality of the data. try: num_axes = len(instance_params["VoxelSize"].split(',')) except KeyError: raise self.RequestError( httplib.BAD_REQUEST, "Cannot create volume. Config data in message body is missing 'VoxelSize' parameter: \n" + str(instance_params)) # Create the new volume in the appropriate 'volumes' group, # and then link to it in the node group. dtypename, channels = VoxelsMetadata.determine_channels_from_dvid_typename( typename) shape = (channels, ) + (0, ) * num_axes maxshape = (None, ) * len(shape) # No maxsize dtype = numpy.dtype(dtypename) self.server.h5_file.create_dataset(volume_path, shape=shape, dtype=dtype, maxshape=maxshape) linkname = '/datasets/{dataset_name}/nodes/{uuid}/{dataname}'.format( **locals()) self.server.h5_file[linkname] = h5py.SoftLink(volume_path) self.server.h5_file.flush()
def write_to_file(f, data): datasets_group = f.create_group('datasets_group') datasets_group.attrs['string_attr'] = 'my string attribute' datasets_group.attrs['int_attr'] = 123 datasets_group.attrs['float_attr'] = 123.456 float_group = datasets_group.create_group('float') float_group.create_dataset('float32', data=data, dtype='f4') float_group.create_dataset('float64', data=data, dtype='f8', fillvalue=6) int_group = datasets_group.create_group('int') int_group.create_dataset('int8', data=data, dtype='i1') int_group.create_dataset('int16', data=data, dtype='i2') int_group.create_dataset('int32', data=data, dtype='i4') links_group = f.create_group('links_group') links_group['hard_link_to_int8'] = int_group['int8'] links_group['soft_link_to_int8'] = h5py.SoftLink( '/datasets_group/int/int8') links_group['broken_soft_link'] = h5py.SoftLink( '/datasets_group/int/missing_dataset') links_group['soft_link_to_group'] = h5py.SoftLink('/datasets_group/int') # Define the external link path relative to this file, to ease testing links_group['external_link'] = h5py.ExternalLink('test_file_ext.hdf5', '/external_dataset') links_group['external_link_to_missing_file'] = h5py.ExternalLink( 'missing_file.hdf5', '/external_dataset') multiDimensionDatasets = f.create_group('nD_Datasets') multiDimensionDatasets.create_dataset('3D_float32', data=data_3d, dtype='f4') multiDimensionDatasets.create_dataset('3D_int32', data=data_3d, dtype='i4') f.flush() f.close()
def write(self, file_name, data_dictionary, data_format=None): if data_format is None: data_format = self.metadataFormat with h5py.File(file_name, 'a') as f: #This below is needed by SHARP if not "cxi_version" in f: f.create_dataset("cxi_version", data=140) if not "entry_1/data_1/" in f: f.create_group("entry_1/data_1") #data_format is used to generate hdf5 fields for key, value in data_dictionary.items(): #we may have here fields that are not considered in the formats above, we still handle them try: group = data_format[key][0] except KeyError: group = key + "/" if value is not None: f.create_dataset(group, data=value) #This below is needed by SHARP if "entry_1/instrument_1/detector_1/data" in f and not "entry_1/data_1/data" in f: f["entry_1/instrument_1/detector_1/data"].attrs[ 'axes'] = "translation:y:x" f["entry_1/data_1/data"] = h5py.SoftLink( "/entry_1/instrument_1/detector_1/data") if "entry_1/sample_1/geometry_1/translation" in f and not "entry_1/data_1/translation" in f and not "entry_1/instrument_1/detector_1/translation" in f: f["entry_1/data_1/translation"] = h5py.SoftLink( "/entry_1/sample_1/geometry_1/translation") f["entry_1/instrument_1/detector_1/translation"] = h5py.SoftLink( "/entry_1/sample_1/geometry_1/translation")
def get_hdf5_with_all_links(): global _file_cache ID = "alllinks" if ID in _file_cache: return _file_cache[ID].name tmp = tempfile.NamedTemporaryFile(prefix=ID + "_", suffix=".h5", delete=True) tmp.file.close() h5 = h5py.File(tmp.name, "w") g = h5.create_group("group") g.create_dataset("dataset", data=numpy.int64(10)) h5.create_dataset("dataset", data=numpy.int64(10)) h5["hard_link_to_group"] = h5["/group"] h5["hard_link_to_dataset"] = h5["/dataset"] h5["soft_link_to_group"] = h5py.SoftLink("/group") h5["soft_link_to_dataset"] = h5py.SoftLink("/dataset") h5["soft_link_to_nothing"] = h5py.SoftLink("/foo/bar/2000") alltypes_filename = get_hdf5_with_all_types() h5["external_link_to_group"] = h5py.ExternalLink(alltypes_filename, "/arrays") h5["external_link_to_dataset"] = h5py.ExternalLink(alltypes_filename, "/arrays/cube") h5["external_link_to_nothing"] = h5py.ExternalLink(alltypes_filename, "/foo/bar/2000") h5["external_link_to_missing_file"] = h5py.ExternalLink( "missing_file.h5", "/") h5.close() _file_cache[ID] = tmp return tmp.name
def save_frame(self, data, name, prefix='targets'): """ Save the a frame to the data file. """ logger.info('Writing %s', name) logger.debug('Writing data of shape %s to %s', data.shape, self.data_file.filename) with warnings.catch_warnings(): warnings.simplefilter('ignore') if len(data.shape) > 2: data = data.transpose( 2, 1, 0) # panel serializes backwards for some reason... data.to_hdf(self.data_file.filename, key='/{prefix}/{name}'.format(prefix=prefix, name=name), **self.pytables_kws) if isinstance(data, pd.Series): self.data_file[name] = h5py.SoftLink( '/{prefix}/{name}/values'.format(prefix=prefix, name=name)) self.data_file[name].dims[0].label = data.index.name elif isinstance(data, pd.DataFrame): self.data_file[name] = h5py.SoftLink( '/{prefix}/{name}/block0_values'.format(prefix=prefix, name=name)) self.data_file[name].dims[0].label = data.index.name self.data_file[name].dims[1].label = data.columns.name elif isinstance(data, pd.Panel): self.data_file[name] = h5py.SoftLink( '/{prefix}/{name}/block0_values'.format(prefix=prefix, name=name)) self.data_file[name].dims[ 0].label = data.minor_axis.name # as panel serializes backwards self.data_file[name].dims[1].label = data.major_axis.name self.data_file[name].dims[2].label = data.items.name
def get_hdf5_with_recursive_links(): ID = "recursive_links" if ID in _file_cache: return _file_cache[ID].name tmp = tempfile.NamedTemporaryFile(prefix=ID + "_", suffix=".h5", delete=True) tmp.file.close() h5 = h5py.File(tmp.name, "w") g = h5.create_group("group") g.create_dataset("dataset", data=numpy.int64(10)) h5.create_dataset("dataset", data=numpy.int64(10)) h5["hard_recursive_link"] = h5["/group"] g["recursive"] = h5["hard_recursive_link"] h5["hard_link_to_dataset"] = h5["/dataset"] h5["soft_link_to_group"] = h5py.SoftLink("/group") h5["soft_link_to_link"] = h5py.SoftLink("/soft_link_to_group") h5["soft_link_to_itself"] = h5py.SoftLink("/soft_link_to_itself") h5.close() _file_cache[ID] = tmp return tmp.name
def copy_exclude(key, ds, to): # function to copy a single entry within a HDF hierarchy, and do recursive calls # if required. If it finds its key in the exclusion patterns, just skips that entry. for ek in exclude_regex: if ek.fullmatch(ds.name) is not None: if print_skipped: print(f'Skipping key {key} due to {ek}') return if isinstance(ds, h5py.Dataset): to.copy(ds, key) elif isinstance(ds, h5py.Group) and 'table_type' in ds.attrs.keys(): # pandas table is a group. Do NOT traverse into it (or experience infinite pain) # print(f'Copying table {key}') to.copy(ds, key) elif isinstance(ds, h5py.Group): # print(f'Creating group {key}') new_grp = to.require_group(key) # attribute copying. Lots of error catching required. try: for k, v in ds.attrs.items(): try: new_grp.attrs.create(k, v) except TypeError as err: new_grp.attrs.create(k, np.string_(v)) except OSError: # some newer HDF5 attribute types (used by pytables) will crash h5py even just listing them # print(f'Could not copy attributes of group {ds.name}') pass for k, v in ds.items(): lnk = ds.get(k, getlink=True) if isinstance(lnk, h5py.SoftLink): for ek in exclude_regex: if ek.fullmatch(lnk.path) is not None: if print_skipped: print(f'Skipping soft link to {ek}') break else: new_grp[k] = h5py.SoftLink(lnk.path) continue copy_exclude(k, v, new_grp)
def create_block(self, blockid=None, groupid="global"): """Create a data block with the specified block ID. Each data block can store several chunks of information, and there can be an arbitrary number of data blocks per file. :param blockid: The ID for the new data block. If not given the blockid will be choosen automatically. The block ID has to be unique. :return: The block ID of the created block. """ if self._srf is None: return if blockid is not None and ( not str(blockid).isalnum()): # or str(blockid)[0].isdigit()): raise ValueError( "Block ID allows only characters A-Z, a-z and 0-9 and no leading digit." ) if blockid is not None and str(blockid) in self._block_ids: raise ValueError("Invalid or already used block ID: " + str(blockid)) if blockid is None: # Try to find a valid autonumber autonumber = 0 while str(autonumber) in self._block_ids: autonumber += 1 blockid = str(autonumber) self._block_ids.append(str(blockid)) self._block_count += 1 # Create the data block self._srf.create_group("/" + self._prefixb + str(blockid)) # Does the group already exist? if not str(groupid) in self._group_ids: self.create_group(groupid=groupid) # Put the data block into the group self._srf["/" + self._prefixb + str(blockid)].attrs["group"] = str(groupid) self._srf["/" + self._prefixg + str(groupid) + "/" + str(blockid)] = hdf.SoftLink("/" + self._prefixb + str(blockid)) return blockid