Example #1
0
 def test_softlinks(self):
     """ Broken softlinks are contained, but their members are not """
     self.f.create_group('grp')
     self.f['/grp/soft'] = h5py.SoftLink('/mongoose')
     self.f['/grp/external'] = h5py.ExternalLink('mongoose.hdf5',
                                                 '/mongoose')
     self.assertIn('/grp/soft', self.f)
     self.assertNotIn('/grp/soft/something', self.f)
     self.assertIn('/grp/external', self.f)
     self.assertNotIn('/grp/external/something', self.f)
Example #2
0
def end_of_run():

    # Close CXI file
    W.close()

    if ipc.mpi.is_main_worker():

        # Reopen CXI file to append with more information nessesary
        # for ptychography datasets, see http://www.cxidb.org/cxi.html
        f = h5py.File(filename, "r+")

        # Already existing fields
        entry_1 = f['entry_1']
        instrument_1 = f['entry_1']['instrument_1']
        detector_1 = f['entry_1']['instrument_1']['detector_1']
        sample_1 = f['entry_1']['sample_1']
        geometry_1 = f['entry_1']['sample_1']['geometry_1']

        # Add new data fields
        f.create_dataset("cxi_version", data=140)
        source_1 = instrument_1.create_group("source_1")
        source_1.create_dataset("energy", data=photon_energy_J)  # in J
        detector_1.create_dataset("distance", data=det_distance)
        detector_1.create_dataset("x_pixel_size", data=det_pixelsize)
        detector_1.create_dataset("y_pixel_size", data=det_pixelsize)
        detector_1["translation"] = h5py.SoftLink(
            '/entry_1/sample_1/geometry_1/translation')
        detector_1.create_dataset("corner_position", data=corner_position)
        data_1 = entry_1.create_group("data_1")
        data_1["data"] = h5py.SoftLink('/entry_1/instrument_1/detector_1/data')
        data_1["translation"] = h5py.SoftLink(
            '/entry_1/sample_1/geometry_1/translation')

        # These are optional data that should be provided (if known)
        # ----------------------------------------------------------
        source_1.create_dataset("illumination", data=sim.get_illumination())
        #detector_1.create_dataset("Fillumination_mask", data=illumination_intensities_mask)
        #detector_1.create_dataset("solution", data=sim.obj)
        #detector_1.create_dataset("initial_image",data=initial_image)

        # Close CXI file and exit
        f.close()
Example #3
0
    def createResource(cls, directory):
        filename = os.path.join(directory, "base.h5")
        extH5FileName = os.path.join(directory, "base__external.h5")
        extDatFileName = os.path.join(directory, "base__external.dat")

        externalh5 = h5py.File(extH5FileName, mode="w")
        externalh5["target/dataset"] = 50
        externalh5["target/link"] = h5py.SoftLink("/target/dataset")
        externalh5["/ext/vds0"] = [0, 1]
        externalh5["/ext/vds1"] = [2, 3]
        externalh5.close()

        numpy.array([0, 1, 10, 10, 2, 3]).tofile(extDatFileName)

        h5 = h5py.File(filename, mode="w")
        h5["group/dataset"] = 50
        h5["link/soft_link"] = h5py.SoftLink("/group/dataset")
        h5["link/soft_link_to_group"] = h5py.SoftLink("/group")
        h5["link/soft_link_to_link"] = h5py.SoftLink("/link/soft_link")
        h5["link/soft_link_to_file"] = h5py.SoftLink("/")
        h5["group/soft_link_relative"] = h5py.SoftLink("dataset")
        h5["link/external_link"] = h5py.ExternalLink(extH5FileName,
                                                     "/target/dataset")
        h5["link/external_link_to_link"] = h5py.ExternalLink(
            extH5FileName, "/target/link")
        h5["broken_link/external_broken_file"] = h5py.ExternalLink(
            extH5FileName + "_not_exists", "/target/link")
        h5["broken_link/external_broken_link"] = h5py.ExternalLink(
            extH5FileName, "/target/not_exists")
        h5["broken_link/soft_broken_link"] = h5py.SoftLink("/group/not_exists")
        h5["broken_link/soft_link_to_broken_link"] = h5py.SoftLink(
            "/group/not_exists")
        layout = h5py.VirtualLayout((2, 2), dtype=int)
        layout[0] = h5py.VirtualSource("base__external.h5",
                                       name="/ext/vds0",
                                       shape=(2, ),
                                       dtype=int)
        layout[1] = h5py.VirtualSource("base__external.h5",
                                       name="/ext/vds1",
                                       shape=(2, ),
                                       dtype=int)
        h5.create_group("/ext")
        h5["/ext"].create_virtual_dataset("virtual", layout)
        external = [("base__external.dat", 0, 2 * 8),
                    ("base__external.dat", 4 * 8, 2 * 8)]
        h5["/ext"].create_dataset("raw",
                                  shape=(2, 2),
                                  dtype=int,
                                  external=external)
        h5.close()

        return filename
Example #4
0
def nxDataAddErrors(data, errors):
    """
    For each dataset in "data", link to the corresponding dataset in "errors".

    :param h5py.Group data:
    :param h5py.Group errors:
    """
    for name in data:
        dest = errors.get(name, None)
        if dest:
            data[name + '_errors'] = h5py.SoftLink(dest.name)
Example #5
0
def create_links(gsrc, gdes, ctx):
    # add soft and external links
    srcid_desobj_map = ctx["srcid_desobj_map"]
    if ctx["verbose"]:
        print("create_links: {}".format(gsrc.name))
    for title in gsrc:
        if ctx["verbose"]:
            print("got link: {}".format(title))
        lnk = gsrc.get(title, getlink=True)
        link_classname = lnk.__class__.__name__
        if link_classname == "HardLink":
            logging.debug("Got hardlink: {} gsrc: {} gdes: {}".format(title, gsrc, gdes))
            if title not in gdes:
                msg = "creating multilink {} with title: {}".format(gdes, title)
                if ctx["verbose"]:
                    print(msg)
                logging.info(msg)
                src_obj_id = gsrc[title].id
                src_obj_id_hash = src_obj_id.__hash__()
                logging.debug("got src_obj_id hash: {}".format(src_obj_id_hash))
                if src_obj_id_hash in srcid_desobj_map:
                    des_obj = srcid_desobj_map[src_obj_id_hash]
                    logging.debug("creating hardlink to {}".format(des_obj.id.id))
                    gdes[title] = des_obj
                else:
                    msg = "could not find map item to src id: {}".format(src_obj_id_hash)
                    logging.warn(msg)
                    if ctx["verbose"]:
                        print("WARNING: " + msg)
        elif link_classname == "SoftLink":
            msg = "creating SoftLink({}) with title: {}".format(lnk.path, title)
            if ctx["verbose"]:
                print(msg)
            logging.info(msg)
            if is_h5py(gdes):
                soft_link = h5py.SoftLink(lnk.path)
            else:
                soft_link = h5pyd.SoftLink(lnk.path)
            gdes[title] = soft_link
        elif link_classname == "ExternalLink":
            msg = "creating ExternalLink({}, {}) with title: {}".format(lnk.filename, lnk.path, title)
            if ctx["verbose"]:
                print(msg)
            logging.info(msg)
            if is_h5py(gdes):
                ext_link = h5py.ExternalLink(lnk.filename, lnk.path)
            else:
                ext_link = h5pyd.ExternalLink(lnk.filename, lnk.path)
            gdes[title] = ext_link
        else:
            msg = "Unexpected link type: {}".format(lnk.__class__.__name__)
            logging.warning(msg)
            if ctx["verbose"]:
                print(msg)
Example #6
0
    def add_keyvalue_group(self, dataset_name, data_name):
        volumes_group, nodes_group = self._get_dataset_groups(dataset_name)

        # Create the group (i.e. the keyvalue store)
        volumes_group.create_group( data_name )
        
        # Add a link to this volume in every node
        for node in nodes_group.values():
            node[data_name] = h5py.SoftLink( volumes_group.name + '/' + data_name )

        self._f.flush()
    def add_property(self, _name_phenomenon, _name_property_set, name, values):
        """ adds a 1d property """
        properties = self.get_property_set(
            _name_phenomenon, _name_property_set).get("properties")

        try:
            # Obtain the number of timesteps
            prop = self.list_property_sets_prop(_name_phenomenon,
                                                _name_property_set)
            timesteps = self._current_timestep(_name_phenomenon,
                                               _name_property_set, prop[0])

            prop = properties.create_group(name)

            if self.is_field_pset(_name_phenomenon, _name_property_set):
                prop_val = prop.create_group("value")
                prop_val.create_dataset(
                    "0", (timesteps, len(values), len(values[0])),
                    dtype=numpy.float64,
                    maxshape=(None, len(values), len(values[0])))
                v = prop_val["0"]
                prop["space discretization"] = h5py.SoftLink(
                    "/phenomena/{0}/property_sets/{1}/properties/space discretization"
                    .format(_name_phenomenon, _name_property_set))
            else:
                prop.create_dataset("value", (timesteps, len(values)),
                                    dtype=numpy.int64,
                                    maxshape=(None, len(values)))
                v = prop["value"]

            # Initialise all 'previous' timesteps
            for ts in xrange(0, timesteps):
                v[ts, ...] = values

            prop["time discretization"] = h5py.SoftLink(
                "/phenomena/{0}/property_sets/{1}/properties/time discretization"
                .format(_name_phenomenon, _name_property_set))

        except Exception as e:
            raise ValueError("{} ({})".format(e, name))
Example #8
0
def get_hdf5_with_external_recursive_links():
    global _file_cache
    ID = "external_recursive_links"
    if ID in _file_cache:
        return _file_cache[ID][0].name

    tmp1 = tempfile.NamedTemporaryFile(prefix=ID + "_",
                                       suffix=".h5",
                                       delete=True)
    tmp1.file.close()
    h5_1 = h5py.File(tmp1.name, "w")

    tmp2 = tempfile.NamedTemporaryFile(prefix=ID + "_",
                                       suffix=".h5",
                                       delete=True)
    tmp2.file.close()
    h5_2 = h5py.File(tmp2.name, "w")

    g = h5_1.create_group("group")
    g.create_dataset("dataset", data=numpy.int64(10))
    h5_1["soft_link_to_group"] = h5py.SoftLink("/group")
    h5_1["external_link_to_link"] = h5py.ExternalLink(tmp2.name,
                                                      "/soft_link_to_group")
    h5_1["external_link_to_recursive_link"] = h5py.ExternalLink(
        tmp2.name, "/external_link_to_recursive_link")
    h5_1.close()

    g = h5_2.create_group("group")
    g.create_dataset("dataset", data=numpy.int64(10))
    h5_2["soft_link_to_group"] = h5py.SoftLink("/group")
    h5_2["external_link_to_link"] = h5py.ExternalLink(tmp1.name,
                                                      "/soft_link_to_group")
    h5_2["external_link_to_recursive_link"] = h5py.ExternalLink(
        tmp1.name, "/external_link_to_recursive_link")
    h5_2.close()

    _file_cache[ID] = (tmp1, tmp2)
    return tmp1.name
Example #9
0
    def _create_volume(self, dataset_name, uuid, dataname, volume_path,
                       typename, instance_params):
        # Must read exact bytes.
        # Apparently rfile.read() just hangs.
        body_len = self.headers.get("Content-Length")

        ## Current DVID API does not use metadata json for creating the volume.
        ## This may change soon...
        ##
        #metadata_json = self.rfile.read( int(body_len) )
        #try:
        #    voxels_metadata = VoxelsMetadata( metadata_json )
        #except ValueError as ex:
        #    raise self.RequestError( httplib.BAD_REQUEST, 'Can\'t create volume.  '
        #                             'Error parsing volume metadata: {}\n'
        #                             'Invalid metadata response body was:\n{}'
        #                             ''.format( ex.args[0], metadata_json ) )
        #expected_typename = voxels_metadata.determine_dvid_typename()
        #if typename != expected_typename:
        #    raise self.RequestError( httplib.BAD_REQUEST,
        #                             "Cannot create volume.  "
        #                             "REST typename was {}, but metadata JSON implies typename {}"
        #                             "".format( typename, expected_typename ) )

        # Instead, the json contains some other parameters that we don't really care about...
        # But we need to read at least one of them to determine the dimensionality of the data.

        try:
            num_axes = len(instance_params["VoxelSize"].split(','))
        except KeyError:
            raise self.RequestError(
                httplib.BAD_REQUEST,
                "Cannot create volume.  Config data in message body is missing 'VoxelSize' parameter: \n"
                + str(instance_params))

        # Create the new volume in the appropriate 'volumes' group,
        #  and then link to it in the node group.
        dtypename, channels = VoxelsMetadata.determine_channels_from_dvid_typename(
            typename)
        shape = (channels, ) + (0, ) * num_axes
        maxshape = (None, ) * len(shape)  # No maxsize
        dtype = numpy.dtype(dtypename)
        self.server.h5_file.create_dataset(volume_path,
                                           shape=shape,
                                           dtype=dtype,
                                           maxshape=maxshape)
        linkname = '/datasets/{dataset_name}/nodes/{uuid}/{dataname}'.format(
            **locals())
        self.server.h5_file[linkname] = h5py.SoftLink(volume_path)
        self.server.h5_file.flush()
Example #10
0
def write_to_file(f, data):
    datasets_group = f.create_group('datasets_group')

    datasets_group.attrs['string_attr'] = 'my string attribute'
    datasets_group.attrs['int_attr'] = 123
    datasets_group.attrs['float_attr'] = 123.456

    float_group = datasets_group.create_group('float')
    float_group.create_dataset('float32', data=data, dtype='f4')
    float_group.create_dataset('float64', data=data, dtype='f8', fillvalue=6)

    int_group = datasets_group.create_group('int')
    int_group.create_dataset('int8', data=data, dtype='i1')
    int_group.create_dataset('int16', data=data, dtype='i2')
    int_group.create_dataset('int32', data=data, dtype='i4')

    links_group = f.create_group('links_group')
    links_group['hard_link_to_int8'] = int_group['int8']
    links_group['soft_link_to_int8'] = h5py.SoftLink(
        '/datasets_group/int/int8')
    links_group['broken_soft_link'] = h5py.SoftLink(
        '/datasets_group/int/missing_dataset')
    links_group['soft_link_to_group'] = h5py.SoftLink('/datasets_group/int')
    # Define the external link path relative to this file, to ease testing
    links_group['external_link'] = h5py.ExternalLink('test_file_ext.hdf5',
                                                     '/external_dataset')
    links_group['external_link_to_missing_file'] = h5py.ExternalLink(
        'missing_file.hdf5', '/external_dataset')

    multiDimensionDatasets = f.create_group('nD_Datasets')
    multiDimensionDatasets.create_dataset('3D_float32',
                                          data=data_3d,
                                          dtype='f4')
    multiDimensionDatasets.create_dataset('3D_int32', data=data_3d, dtype='i4')

    f.flush()
    f.close()
Example #11
0
    def write(self, file_name, data_dictionary, data_format=None):

        if data_format is None:
            data_format = self.metadataFormat

        with h5py.File(file_name, 'a') as f:

            #This below is needed by SHARP
            if not "cxi_version" in f:
                f.create_dataset("cxi_version", data=140)

            if not "entry_1/data_1/" in f:
                f.create_group("entry_1/data_1")

            #data_format is used to generate hdf5 fields
            for key, value in data_dictionary.items():
                #we may have here fields that are not considered in the formats above, we still handle them
                try:
                    group = data_format[key][0]
                except KeyError:
                    group = key + "/"

                if value is not None:
                    f.create_dataset(group, data=value)

            #This below is needed by SHARP
            if "entry_1/instrument_1/detector_1/data" in f and not "entry_1/data_1/data" in f:
                f["entry_1/instrument_1/detector_1/data"].attrs[
                    'axes'] = "translation:y:x"
                f["entry_1/data_1/data"] = h5py.SoftLink(
                    "/entry_1/instrument_1/detector_1/data")

            if "entry_1/sample_1/geometry_1/translation" in f and not "entry_1/data_1/translation" in f and not "entry_1/instrument_1/detector_1/translation" in f:
                f["entry_1/data_1/translation"] = h5py.SoftLink(
                    "/entry_1/sample_1/geometry_1/translation")
                f["entry_1/instrument_1/detector_1/translation"] = h5py.SoftLink(
                    "/entry_1/sample_1/geometry_1/translation")
Example #12
0
def get_hdf5_with_all_links():
    global _file_cache
    ID = "alllinks"
    if ID in _file_cache:
        return _file_cache[ID].name

    tmp = tempfile.NamedTemporaryFile(prefix=ID + "_",
                                      suffix=".h5",
                                      delete=True)
    tmp.file.close()
    h5 = h5py.File(tmp.name, "w")

    g = h5.create_group("group")
    g.create_dataset("dataset", data=numpy.int64(10))
    h5.create_dataset("dataset", data=numpy.int64(10))

    h5["hard_link_to_group"] = h5["/group"]
    h5["hard_link_to_dataset"] = h5["/dataset"]

    h5["soft_link_to_group"] = h5py.SoftLink("/group")
    h5["soft_link_to_dataset"] = h5py.SoftLink("/dataset")
    h5["soft_link_to_nothing"] = h5py.SoftLink("/foo/bar/2000")

    alltypes_filename = get_hdf5_with_all_types()

    h5["external_link_to_group"] = h5py.ExternalLink(alltypes_filename,
                                                     "/arrays")
    h5["external_link_to_dataset"] = h5py.ExternalLink(alltypes_filename,
                                                       "/arrays/cube")
    h5["external_link_to_nothing"] = h5py.ExternalLink(alltypes_filename,
                                                       "/foo/bar/2000")
    h5["external_link_to_missing_file"] = h5py.ExternalLink(
        "missing_file.h5", "/")
    h5.close()

    _file_cache[ID] = tmp
    return tmp.name
Example #13
0
    def save_frame(self, data, name, prefix='targets'):
        """ Save the a frame to the data file. """

        logger.info('Writing %s', name)
        logger.debug('Writing data of shape %s to %s', data.shape,
                     self.data_file.filename)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            if len(data.shape) > 2:
                data = data.transpose(
                    2, 1, 0)  # panel serializes backwards for some reason...
            data.to_hdf(self.data_file.filename,
                        key='/{prefix}/{name}'.format(prefix=prefix,
                                                      name=name),
                        **self.pytables_kws)

        if isinstance(data, pd.Series):
            self.data_file[name] = h5py.SoftLink(
                '/{prefix}/{name}/values'.format(prefix=prefix, name=name))
            self.data_file[name].dims[0].label = data.index.name

        elif isinstance(data, pd.DataFrame):
            self.data_file[name] = h5py.SoftLink(
                '/{prefix}/{name}/block0_values'.format(prefix=prefix,
                                                        name=name))
            self.data_file[name].dims[0].label = data.index.name
            self.data_file[name].dims[1].label = data.columns.name

        elif isinstance(data, pd.Panel):
            self.data_file[name] = h5py.SoftLink(
                '/{prefix}/{name}/block0_values'.format(prefix=prefix,
                                                        name=name))
            self.data_file[name].dims[
                0].label = data.minor_axis.name  # as panel serializes backwards
            self.data_file[name].dims[1].label = data.major_axis.name
            self.data_file[name].dims[2].label = data.items.name
Example #14
0
def get_hdf5_with_recursive_links():
    ID = "recursive_links"
    if ID in _file_cache:
        return _file_cache[ID].name

    tmp = tempfile.NamedTemporaryFile(prefix=ID + "_", suffix=".h5", delete=True)
    tmp.file.close()
    h5 = h5py.File(tmp.name, "w")

    g = h5.create_group("group")
    g.create_dataset("dataset", data=numpy.int64(10))
    h5.create_dataset("dataset", data=numpy.int64(10))

    h5["hard_recursive_link"] = h5["/group"]
    g["recursive"] = h5["hard_recursive_link"]
    h5["hard_link_to_dataset"] = h5["/dataset"]

    h5["soft_link_to_group"] = h5py.SoftLink("/group")
    h5["soft_link_to_link"] = h5py.SoftLink("/soft_link_to_group")
    h5["soft_link_to_itself"] = h5py.SoftLink("/soft_link_to_itself")
    h5.close()

    _file_cache[ID] = tmp
    return tmp.name
Example #15
0
        def copy_exclude(key, ds, to):
            # function to copy a single entry within a HDF hierarchy, and do recursive calls
            # if required. If it finds its key in the exclusion patterns, just skips that entry.

            for ek in exclude_regex:
                if ek.fullmatch(ds.name) is not None:
                    if print_skipped:
                        print(f'Skipping key {key} due to {ek}')
                    return

            if isinstance(ds, h5py.Dataset):
                to.copy(ds, key)

            elif isinstance(ds,
                            h5py.Group) and 'table_type' in ds.attrs.keys():
                # pandas table is a group. Do NOT traverse into it (or experience infinite pain)
                # print(f'Copying table {key}')
                to.copy(ds, key)

            elif isinstance(ds, h5py.Group):
                # print(f'Creating group {key}')
                new_grp = to.require_group(key)

                # attribute copying. Lots of error catching required.
                try:
                    for k, v in ds.attrs.items():
                        try:
                            new_grp.attrs.create(k, v)
                        except TypeError as err:
                            new_grp.attrs.create(k, np.string_(v))
                except OSError:
                    # some newer HDF5 attribute types (used by pytables) will crash h5py even just listing them
                    # print(f'Could not copy attributes of group {ds.name}')
                    pass

                for k, v in ds.items():
                    lnk = ds.get(k, getlink=True)
                    if isinstance(lnk, h5py.SoftLink):
                        for ek in exclude_regex:
                            if ek.fullmatch(lnk.path) is not None:
                                if print_skipped:
                                    print(f'Skipping soft link to {ek}')
                                break
                        else:
                            new_grp[k] = h5py.SoftLink(lnk.path)
                        continue

                    copy_exclude(k, v, new_grp)
Example #16
0
    def create_block(self, blockid=None, groupid="global"):
        """Create a data block with the specified block ID. Each data block can
        store several chunks of information, and there can be an arbitrary number
        of data blocks per file.

        :param blockid: The ID for the new data block. If not given the blockid will
                        be choosen automatically. The block ID has to be unique.
        :return: The block ID of the created block.
        """
        if self._srf is None:
            return

        if blockid is not None and (
                not str(blockid).isalnum()):  # or str(blockid)[0].isdigit()):
            raise ValueError(
                "Block ID allows only characters A-Z, a-z and 0-9 and no leading digit."
            )

        if blockid is not None and str(blockid) in self._block_ids:
            raise ValueError("Invalid or already used block ID: " +
                             str(blockid))

        if blockid is None:
            # Try to find a valid autonumber
            autonumber = 0
            while str(autonumber) in self._block_ids:
                autonumber += 1
            blockid = str(autonumber)

        self._block_ids.append(str(blockid))
        self._block_count += 1

        # Create the data block
        self._srf.create_group("/" + self._prefixb + str(blockid))

        # Does the group already exist?
        if not str(groupid) in self._group_ids:
            self.create_group(groupid=groupid)

        # Put the data block into the group
        self._srf["/" + self._prefixb +
                  str(blockid)].attrs["group"] = str(groupid)
        self._srf["/" + self._prefixg + str(groupid) + "/" +
                  str(blockid)] = hdf.SoftLink("/" + self._prefixb +
                                               str(blockid))

        return blockid