def read_crag(crag_path, solution_name='best-effort'): """Reads the CRAG (Candidate Region Adjacency) data Parameters ---------- crag_path : string Path where the crag is stored solution_name: string Name of the solution to retrieve. If no solution is wanted set to None Returns ---------- crag: Crag Crag main, structure volumes: Set of volumes Volumes in the crag ein: Volume Intensity volume ebb: Volume Boundary volume """ # Check extension is correct if not dataio.valid_volume_path(crag_path): raise ValueError('Wrong extension for CRAG file {}. Must be valid HDF5 extension'.format(crag_path)) print('Retrieving CRAG ...') # Read crag store = Hdf5CragStore(crag_path) crag = Crag() store.retrieveCrag(crag) # Volumes references volume_store = Hdf5VolumeStore(crag_path) # Gest proposed solution, if requested if solution_name is None: crag_solution = None else: # TODO: check if name exists when wrapper bug is solved #solution_names = store.getSolutionNames() #if solution_name not in solution_names: # raise ValueError('Requested {} is not in set # {}'.format(solution_name, # solution_names)) crag_solution = CragSolution(crag) store.retrieveSolution(crag, crag_solution, solution_name) # Read volumes volumes = CragVolumes(crag) store.retrieveVolumes(volumes) # Read each of/pad_bb the volumes stored in the CRAG ebb = ExplicitVolume_f() ein = ExplicitVolume_f() volume_store.retrieveBoundaries(ebb) volume_store.retrieveIntensities(ein) return crag, volumes, crag_solution, ein, ebb
def read(self, num=None): """Reads image content from the dataset path. If a HDF5 provided, the group where images are stored must be provided. Default image extensions are PNG, TIF and TIFF. Others can be provided :param num: Number of images to read from the dataset. Set to None for reading all""" if not os.path.exists(self.path): raise ValueError('Path {} does not exist'.format(self.path)) if os.path.isfile(self.path): if not dataio.valid_volume_path(self.path): raise ValueError('Invalid extension for file {}'.format( self.path)) with h5py.File(self.path, 'r') as f: dataset = dataio.get_hf_group(f, self.group) if num is None: self.imgs = dataset[()] else: if num > dataset.shape[0]: raise ValueError( 'Cannot read more images than the ones available') self.imgs = dataset[0:num][:] # Previous code loads all images in memory. Fix this for big # datasets when memory is limited else: reader = dataio.FileReader(self.path, self.exts) self.imgs = reader.read(num) if self.imgs.shape[0] == 0: raise ValueError('No data has been read')
def valid_input(self, d): """ Transforms the input into a compatible format """ if os.path.isdir(d): return self.process_folder(d) elif dataio.valid_volume_path(d): return self.process_h5(d) else: raise IOError('Formats accepted are HDF5 and folders')
def read_crag(self, crag): """ Reads an existing CRAG from its project file Params --------- crag: crag Path to the CRAG project file """ if not dataio.valid_volume_path(crag): raise ValueError('A valid HDF5 file must be provided') self.project_file = crag
def save_data(self, out_p, group='data/labels', min_digit=5, overwrite=True, int_data=False): """ Saves processed data. If path to HDF file provided, a dataset is created inside the given group. Otherwise, images are dumped into the folder specified in the path. Params: --------- out_p: string Output path. If it corresponds to a valid HDF5 it is stored as a HDF5 dataset. Otherwise it is stored in a file. group: string In case the output corresponds to a HDF5 file, it is the path inside the dataset where data needs to be stored. Subgroups must be separated by a /. Not used uf dumping into a folder. min_digit: integer Images are named, in order, with its position in the input volume. This number specifies the minimum amount of digits to use in the labeling if dumping data into a folder. Not used for HDF5 files. overwrite: boolean Whether to overwrite existing datasets in the destination path int_data: boolean By default data is stored as float. If this field is True, it is stored as unsigned integer in .png files. Only used if data path is a folder. """ if os.path.exists(out_p) and not overwrite: return self._check_data() if dataio.valid_volume_path(out_p): with h5py.File(out_p, 'w') as f: dataio.store_hf_group(f, group, self.imgs) else: dataio.create_dir(out_p) if int_data is True: dataio.volume_to_folder(self.imgs, out_p, min_digit=min_digit, typ='uint8', ext='.png') else: dataio.volume_to_folder(self.imgs, out_p, min_digit=min_digit)
def _init_output(self): """ Prepares the storage of the dataset output """ if os.path.isfile(self.output_path): raise IOError('File {} already exists'.format(self.output_path)) if not dataio.valid_volume_path(self.output_path): raise ValueError('Given output path is not a valid HDF5 file') # Create HDF5. Remeber to close it at at the end self.h5_file = h5py.File(self.output_path, 'w') # Initialize data as empty and resize it when needed. Maximum size: none data_shape = (0, self.get_sections(), self.dims, self.conf.height, self.conf.width) max_shape = (None, self.get_sections(), self.dims, self.conf.height, self.conf.width) # Create group for data, labels, refs and channel maps self.h5_file.create_dataset(DATA_TAG, data_shape, compression='gzip', chunks=True, maxshape=max_shape) self.h5_file.create_dataset(REF_TAG, (0, ), compression='gzip', chunks=True, maxshape=(None, )) if self.store_label() is True: self.h5_file.create_dataset(LABEL_TAG, (0, ), compression='gzip', maxshape=(None, ), chunks=True) labels_type = h5py.special_dtype(vlen=str) labels_data = np.asarray(list(self.channel_map.keys()), dtype=object) self.h5_file.create_dataset('clabels', data=labels_data, dtype=labels_type) self.h5_file.create_dataset('cpositions', data=np.asarray( list(self.channel_map.values()))) # Store metadata in separate dataset self.h5_file.attrs.create('height', data=self.conf.height) self.h5_file.attrs.create('width', data=self.conf.width) self.h5_file.attrs.create('padding', data=self.conf.padding) self.h5_file.attrs.create('normalise', data=self.conf.normalise)