def write_simple_attrs(h5_obj, attrs, verbose=False): """ Writes attributes to a h5py object Parameters ---------- h5_obj : :class:`h5py.File`, :class:`h5py.Group`, or h5py.Dataset object h5py object to which the attributes will be written to attrs : dict Dictionary containing the attributes as key-value pairs verbose : bool, optional. Default=False Whether or not to print debugging statements """ if not isinstance(attrs, dict): raise TypeError('attrs should be a dictionary but is instead of type ' '{}'.format(type(attrs))) if not isinstance(h5_obj, (h5py.File, h5py.Group, h5py.Dataset)): raise TypeError('h5_obj should be a h5py File, Group or Dataset object' ' but is instead of type ' '{}t'.format(type(h5_obj))) for key, val in attrs.items(): if not isinstance(key, (str, unicode)): warn('Skipping attribute with key: {}. Expected str, got {}' ''.format(key, type(key))) continue # Get rid of spaces in the key key = key.strip() if val is None: continue if isinstance(val, Enum): if verbose: print('taking the name: {} of Enum: {}'.format(val.name, val)) val = val.name if isinstance(val, dict): raise ValueError('provided dictionary was nested, not flat. ' 'Flatten dictionary using sidpy.base.dict_utils.' 'flatten_dict before calling sidpy.hdf.hdf_utils.' 'write_simple_attrs') if verbose: print('Writing attribute: {} with value: {}'.format(key, val)) clean_val = clean_string_att(val) if verbose: print('Attribute cleaned into: {}'.format(clean_val)) h5_obj.attrs[key] = clean_val if verbose: print('Wrote all (simple) attributes to {}: {}\n' ''.format(type(h5_obj), h5_obj.name.split('/')[-1]))
def clean_string_att(att_val): """ Replaces any unicode objects within lists with their string counterparts to ensure compatibility with python 3. If the attribute is indeed a list of unicodes, the changes will be made in-place Parameters ---------- att_val : object Attribute object Returns ------- att_val : object Attribute object """ warn( 'pyUSID.io.write_utils.clean_string_att has been moved to ' 'sidpy.base.string_utils.clean_string_att. This copy in pyUSID will' 'be removed in future release. Please update your import statements', FutureWarning) return sut.clean_string_att(att_val)
def copy_main_attributes(h5_main, h5_new): """ Copies the units and quantity name from one dataset to another Parameters ---------- h5_main : h5py.Dataset Dataset containing the target attributes h5_new : h5py.Dataset Dataset to which the target attributes are to be copied """ for param, param_name in zip([h5_main, h5_new], ['h5_main', 'h5_new']): if not isinstance(param, h5py.Dataset): raise TypeError(param_name + ' should be a h5py.Dataset object') for att_name in ['quantity', 'units']: if att_name not in h5_main.attrs: raise KeyError('Attribute: {} does not exist in {}'.format(att_name, h5_main)) val = get_attr(h5_main, att_name) h5_new.attrs[att_name] = clean_string_att(val)
def write_region_references(h5_dset, reg_ref_dict, add_labels_attr=True, verbose=False): """ Creates attributes of a h5py.Dataset that refer to regions in the dataset Parameters ---------- h5_dset : h5.Dataset instance Dataset to which region references will be added as attributes reg_ref_dict : dict The slicing information must be formatted using tuples of slice objects . For example {'region_1':(slice(None, None), slice (0,1))} add_labels_attr : bool, optional, default = True Whether or not to write an attribute named 'labels' with the verbose : Boolean (Optional. Default = False) Whether or not to print status messages """ if not isinstance(reg_ref_dict, dict): raise TypeError('slices should be a dictionary but is instead of type ' '{}'.format(type(reg_ref_dict))) if not isinstance(h5_dset, h5py.Dataset): raise TypeError('h5_dset should be a h5py.Dataset object but is ' 'instead of type {}'.format(type(h5_dset))) if verbose: print('Starting to write Region References to Dataset', h5_dset.name, 'of shape:', h5_dset.shape) for reg_ref_name, reg_ref_tuple in reg_ref_dict.items(): if verbose: print('About to write region reference:', reg_ref_name, ':', reg_ref_tuple) reg_ref_tuple = clean_reg_ref(h5_dset, reg_ref_tuple, verbose=verbose) h5_dset.attrs[reg_ref_name] = h5_dset.regionref[reg_ref_tuple] if verbose: print('Wrote Region Reference:%s' % reg_ref_name) ''' Next, write these label names as an attribute called labels Now make an attribute called 'labels' that is a list of strings First ascertain the dimension of the slicing: ''' if add_labels_attr: found_dim = False dimen_index = None for key, val in reg_ref_dict.items(): if not isinstance(val, (list, tuple)): reg_ref_dict[key] = [val] for dimen_index, slice_obj in enumerate( list(reg_ref_dict.values())[0]): # We make the assumption that checking the start is sufficient if slice_obj.start is not None: found_dim = True break if found_dim: headers = [None] * len(reg_ref_dict) # The list that will hold all the names for col_name in reg_ref_dict.keys(): headers[reg_ref_dict[col_name][dimen_index].start] = col_name if verbose: print('Writing header attributes: {}'.format('labels')) # Now write the list of col / row names as an attribute: h5_dset.attrs['labels'] = clean_string_att(headers) else: warn('Unable to write region references for {}' ''.format(h5_dset.name.split('/')[-1])) if verbose: print('Wrote Region References of Dataset {}' ''.format(h5_dset.name.split('/')[-1]))
def copy_attributes(source, dest, skip_refs=True, verbose=False): """ Copy attributes from one h5object to another Parameters ---------- source : h5py.Dataset, :class:`h5py.Group`, or :class:`h5py.File` Object containing the desired attributes dest : h5py.Dataset, :class:`h5py.Group`, or :class:`h5py.File` Object to which the attributes need to be copied to skip_refs : bool, optional. default = True Whether or not the references (dataset and region) should be skipped verbose : bool, optional. Default = False Whether or not to print logs for debugging """ mesg = 'should be a h5py.Dataset, h5py.Group,or h5py.File object' if not isinstance(source, (h5py.Dataset, h5py.Group, h5py.File)): raise TypeError('source ' + mesg) if not isinstance(dest, (h5py.Dataset, h5py.Group, h5py.File)): raise TypeError('dest ' + mesg) skip_dset_refs = skip_refs try: validate_h5_objs_in_same_h5_file(source, dest) except ValueError: if not skip_refs: warn('Dataset references will not be copied since {} and {} are ' 'in different files'.format(source, dest)) skip_dset_refs = True for att_name in source.attrs.keys(): # print(att_name) if att_name not in ['DIMENSION_LIST']: att_val = get_attr(source, att_name) """ Don't copy references unless asked """ if isinstance(att_val, h5py.Reference) and not isinstance( att_val, h5py.RegionReference): if not skip_dset_refs: if verbose: print('dset ref copying ' + att_name) dest.attrs[att_name] = att_val elif isinstance(att_val, h5py.RegionReference): # handled in dedicated if condition below continue else: # everything else if verbose: print('simple copying ' + att_name) dest.attrs[att_name] = clean_string_att(att_val) if not skip_refs: # This can be copied across files without problems mesg = 'Could not copy region references to {}.'.format(dest.name) if isinstance(dest, h5py.Dataset): try: if verbose: print('requested reg ref copy') # copy_region_refs(source, dest) pass # TODO: activate again except TypeError: warn(mesg) else: warn('Cannot copy region references to {}'.format(type(dest))) return dest