def write_thread(out_file_path: Path, data: np.ndarray, metadata: OmeXml, chan_name: str): """ Thread for saving images This function is intended to be run inside a threadpool to save an image. Args: out_file_path (Path): Path to an output file data (np.ndarray): FOV to save metadata (OmeXml): Metadata for the image chan_name (str): Name of the channel """ ProcessManager.log(f'Writing: {out_file_path.name}') with BioWriter(out_file_path,metadata=metadata) as bw: bw.X = data.shape[1] bw.Y = data.shape[0] bw.Z = 1 bw.C = 1 bw.cnames = [chan_name] bw[:] = data
def main(inpDir, outDir, projection, method): # images in the input directory inpDir_files = os.listdir(inpDir) inpDir_files = [ filename for filename in inpDir_files if filename.endswith('.ome.tif') ] # Surround with try/finally for proper error catching try: for image_name in inpDir_files: input_img_path = os.path.join(inpDir, image_name) output_img_path = os.path.join(outDir, image_name) ProcessManager.submit_process(process_image, input_img_path, output_img_path, projection, method) ProcessManager.join_processes() except Exception: traceback.print_exc() finally: # Exit the program logger.info('Exiting the workflow..') sys.exit()
def process_image(input_img_path, output_img_path, projection, method): # Grab a free process with ProcessManager.process(): # initalize biowriter and bioreader with BioReader(input_img_path, max_workers=ProcessManager._active_threads) as br, \ BioWriter(output_img_path, metadata=br.metadata, max_workers=ProcessManager._active_threads) as bw: # output image is 2d bw.Z = 1 # iterate along the x,y direction for x in range(0, br.X, tile_size): x_max = min([br.X, x + tile_size]) for y in range(0, br.Y, tile_size): y_max = min([br.Y, y + tile_size]) ProcessManager.submit_thread(projection, br, bw, (x, x_max), (y, y_max), method=method) ProcessManager.join_threads()
def write_slide(self): with ProcessManager.process(f'{self.base_path} - {self.output_depth}'): ProcessManager.submit_thread(self._write_slide) ProcessManager.join_threads()
def unshade_image(img, out_dir, brightfield, darkfield, photobleach=None, offset=None): with ProcessManager.thread() as active_threads: with BioReader(img, max_workers=active_threads.count) as br: with BioWriter(out_dir.joinpath(img.name), metadata=br.metadata, max_workers=active_threads.count) as bw: new_img = br[:, :, :1, 0, 0].squeeze().astype(np.float32) new_img = new_img - darkfield new_img = np.divide(new_img, brightfield) if photobleach != None: new_img = new_img - np.float32(photobleach) if offset != None: new_img = new_img + np.float32(offset) new_img[new_img < 0] = 0 new_img = new_img.astype(br.dtype) bw[:] = new_img
def mean_projection(br, bw, x_range, y_range, **kwargs): """ Calculate the mean intensity projection Args: br (BioReader object): input file object bw (BioWriter object): output file object x_range (tuple): x-range of the img to be processed y_range (tuple): y-range of the img to be processed Returns: image array : Mean IP of the input volume """ with ProcessManager.thread(): br.max_workers = ProcessManager._active_threads bw.max_workers = ProcessManager._active_threads # x,y range of the volume x, x_max = x_range y, y_max = y_range # iterate over depth out_image = np.zeros((y_max - y, x_max - x), dtype=np.float64) for ind, z in enumerate(range(0, br.Z, tile_size_z)): z_max = min([br.Z, z + tile_size_z]) out_image += np.sum(br[y:y_max, x:x_max, z:z_max, ...].astype(np.float64), axis=2).squeeze() # output image out_image /= br.Z bw[y:y_max, x:x_max, 0:1, 0, 0] = out_image.astype(br.dtype)
def main(input_dir: pathlib.Path, file_pattern: str, output_dir: pathlib.Path ) -> None: # create the filepattern object fp = filepattern.FilePattern(input_dir,file_pattern) for files in fp(group_by='z'): output_name = fp.output_name(files) output_file = output_dir.joinpath(output_name) ProcessManager.submit_process(_merge_layers,files,output_file) ProcessManager.join_processes()
def assemble_image(vector_path: pathlib.Path, out_path: pathlib.Path, depth: int) -> None: """Assemble a 2d or 3d image This method assembles one image from one stitching vector. It can assemble both 2d and z-stacked 3d images It is intended to run as a process to parallelize stitching of multiple images. The basic approach to stitching is: 1. Parse the stitching vector and abstract the image dimensions 2. Generate a thread for each subsection (supertile) of an image. Args: vector_path: Path to the stitching vector out_path: Path to the output directory depth: depth of the input images """ # Grab a free process with ProcessManager.process(): # Parse the stitching vector parsed_vector = _parse_stitch(vector_path, timesliceNaming) # Initialize the output image with BioReader(parsed_vector['filePos'][0]['file']) as br: bw = BioWriter(out_path.joinpath(parsed_vector['name']), metadata=br.metadata, max_workers=ProcessManager._active_threads) bw.x = parsed_vector['width'] bw.y = parsed_vector['height'] bw.z = depth # Assemble the images ProcessManager.log(f'Begin assembly') for z in range(depth): ProcessManager.log(f'Assembling Z position : {z}') for x in range(0, parsed_vector['width'], chunk_size): X_range = min(x + chunk_size, parsed_vector['width'] ) # max x-pixel index in the assembled image for y in range(0, parsed_vector['height'], chunk_size): Y_range = min(y + chunk_size, parsed_vector['height'] ) # max y-pixel index in the assembled image ProcessManager.submit_thread(make_tile, x, X_range, y, Y_range, z, parsed_vector, bw) ProcessManager.join_threads() bw.close()
def load_and_store(fname, ind): with ProcessManager.thread() as active_threads: with BioReader(fname['file'], max_workers=active_threads.count) as br: I = np.squeeze(br[:, :, :1, 0, 0]) img_stack[:, :, ind] = cv2.resize( I, (OPTIONS['size'], OPTIONS['size']), interpolation=cv2.INTER_LINEAR).astype(np.float64)
def load_and_scale(*args,**kwargs): sub_image = _get_higher_res(**kwargs) with ProcessManager.thread(): image = args[0] x_ind = args[1] y_ind = args[2] image[y_ind[0]:y_ind[1],x_ind[0]:x_ind[1]] = kwargs['slide_writer'].scale(sub_image)
def label_cython(input_path: Path, output_path: Path, connectivity: int): """ Label the input image and writes labels back out. Args: input_path: Path to input image. output_path: Path for output image. connectivity: Connectivity kind. """ with ProcessManager.thread() as active_threads: with BioReader( input_path, max_workers=active_threads.count, ) as reader: with BioWriter( output_path, max_workers=active_threads.count, metadata=reader.metadata, ) as writer: # Load an image and convert to binary image = numpy.squeeze(reader[..., 0, 0]) if not numpy.any(image): writer.dtype = numpy.uint8 writer[:] = numpy.zeros_like(image, dtype=numpy.uint8) return image = (image > 0) if connectivity > image.ndim: ProcessManager.log( f'{input_path.name}: Connectivity is not less than or equal to the number of image dimensions, ' f'skipping this image. connectivity={connectivity}, ndim={image.ndim}' ) return # Run the labeling algorithm labels = ftl.label_nd(image, connectivity) # Save the image writer.dtype = labels.dtype writer[:] = labels return True
def image_to_zarr(inp_image: Path, out_dir: Path) -> None: with ProcessManager.process(): with BioReader(inp_image) as br: # Loop through timepoints for t in range(br.T): # Loop through channels for c in range(br.C): extension = "".join([ suffix for suffix in inp_image.suffixes[-2:] if len(suffix) < 5 ]) out_path = out_dir.joinpath( inp_image.name.replace(extension, FILE_EXT)) if br.C > 1: out_path = out_dir.joinpath( out_path.name.replace(FILE_EXT, f"_c{c}" + FILE_EXT)) if br.T > 1: out_path = out_dir.joinpath( out_path.name.replace(FILE_EXT, f"_t{t}" + FILE_EXT)) with BioWriter( out_path, max_workers=ProcessManager._active_threads, metadata=br.metadata, ) as bw: bw.C = 1 bw.T = 1 bw.channel_names = [br.channel_names[c]] # Loop through z-slices for z in range(br.Z): # Loop across the length of the image for y in range(0, br.Y, TILE_SIZE): y_max = min([br.Y, y + TILE_SIZE]) bw.max_workers = ProcessManager._active_threads br.max_workers = ProcessManager._active_threads # Loop across the depth of the image for x in range(0, br.X, TILE_SIZE): x_max = min([br.X, x + TILE_SIZE]) bw[y:y_max, x:x_max, z:z + 1, 0, 0] = br[y:y_max, x:x_max, z:z + 1, c, t]
def main(imgPath: pathlib.Path, stitchPath: pathlib.Path, outDir: pathlib.Path, timesliceNaming: typing.Optional[bool] ) -> None: '''Setup stitching variables/objects''' # Get a list of stitching vectors vectors = list(stitchPath.iterdir()) vectors.sort() # Try to infer a filepattern from the files on disk for faster matching later global fp # make the filepattern global to share between processes try: pattern = filepattern.infer_pattern([f.name for f in imgPath.iterdir()]) logger.info(f'Inferred file pattern: {pattern}') fp = filepattern.FilePattern(imgPath,pattern) # Pattern inference didn't work, so just get a list of files except: logger.info(f'Unable to infer pattern, defaulting to: .*') fp = filepattern.FilePattern(imgPath,'.*') '''Run stitching jobs in separate processes''' ProcessManager.init_processes('main','asmbl') for v in vectors: # Check to see if the file is a valid stitching vector if 'img-global-positions' not in v.name: continue ProcessManager.submit_process(assemble_image,v,outDir) ProcessManager.join_processes()
def main(input_dir: pathlib.Path, output_dir: pathlib.Path, file_pattern: typing.Optional[str] = None, group_by: typing.Optional[str] = None, get_darkfield: typing.Optional[bool] = None, get_photobleach: typing.Optional[bool] = None, metadata_dir: pathlib.Path = None) -> None: if group_by is None: group_by = 'xyp' if get_darkfield is None: get_darkfield = False if get_photobleach is None: get_photobleach = False if file_pattern is None: filepattern = '.*' fp = FilePattern(input_dir, file_pattern) ProcessManager.init_processes("basic") for files in fp(group_by=group_by): ProcessManager.submit_process(basic.basic, files, output_dir, metadata_dir, get_darkfield, get_photobleach) ProcessManager.join_processes()
def unshade_batch(files: typing.List[Path], out_dir: Path, brightfield: Path, darkfield: Path, photobleach: typing.Optional[Path] = None): if photobleach != None: with open(photobleach, 'r') as f: reader = csv.reader(f) photo_offset = { line[0]: float(line[1]) for line in reader if line[0] != 'file' } offset = np.mean([o for o in photo_offset.values()]) else: offset = None with ProcessManager.process(): with BioReader(brightfield, max_workers=2) as bf: brightfield_image = bf[:, :, :, 0, 0].squeeze() with BioReader(darkfield, max_workers=2) as df: darkfield_image = df[:, :, :, 0, 0].squeeze() threads = [] for file in files: if photobleach != None: pb = photo_offset[file['file']] else: pb = None ProcessManager.submit_thread(unshade_image, file['file'], out_dir, brightfield_image, darkfield_image, pb, offset) ProcessManager.join_threads()
def main(imgDir: Path, imgPattern: str, ffDir: Path, brightPattern: str, outDir: Path, darkPattern: typing.Optional[str] = None, photoPattern: typing.Optional[str] = None) -> None: ''' Start a process for each set of brightfield/darkfield/photobleach patterns ''' # Create the FilePattern objects to handle file access ff_files = FilePattern(ffDir, brightPattern) fp = FilePattern(imgDir, imgPattern) if darkPattern != None and darkPattern != '': dark_files = FilePattern(ffDir, darkPattern) if photoPattern != None and photoPattern != '': photo_files = FilePattern( str(Path(ffDir).parents[0].joinpath('metadata').absolute()), photoPattern) group_by = [v for v in fp.variables if v not in ff_files.variables] GROUPED = group_by + ['file'] ProcessManager.init_processes('main', 'unshade') for files in fp(group_by=group_by): flat_path = ff_files.get_matching( **{k.upper(): v for k, v in files[0].items() if k not in GROUPED})[0]['file'] if flat_path is None: logger.warning("Could not find a flatfield image, skipping...") continue if darkPattern is not None and darkPattern != '': dark_path = dark_files.get_matching(**{ k.upper(): v for k, v in files[0].items() if k not in GROUPED })[0]['file'] if dark_path is None: logger.warning("Could not find a darkfield image, skipping...") continue if photoPattern is not None and photoPattern != '': photo_path = photo_files.get_matching(**{ k.upper(): v for k, v in files[0].items() if k not in GROUPED })[0]['file'] if photo_path is None: logger.warning( "Could not find a photobleach file, skipping...") continue ProcessManager.submit_process(unshade_batch, files, outDir, flat_path, dark_path, photo_path) ProcessManager.join_processes()
def label_thread(input_path, output_path, connectivity): with ProcessManager.thread() as active_threads: with bfio.BioReader(input_path, max_workers=active_threads.count) as br: with bfio.BioWriter(output_path, max_workers=active_threads.count, metadata=br.metadata) as bw: # Load an image and convert to binary image = (br[..., 0, 0] > 0).squeeze() if connectivity > image.ndim: ProcessManager.log( "{}: Connectivity is not less than or equal to the number of image dimensions, skipping this image. connectivity={}, ndim={}" .format(input_path.name, connectivity, image.ndim)) return # Run the labeling algorithm labels = ftl.label_nd(image.squeeze(), connectivity) # Save the image bw.dtype = labels.dtype bw[:] = labels
def main( inpDir: Path, outDir: Path, ) -> None: ProcessManager.init_processes("main", "zarr") for file in inpDir.iterdir(): ProcessManager.submit_process(image_to_zarr, file, outDir) ProcessManager.join_processes()
def main( inpDir: Path, filePattern: str, outDir: Path, ) -> None: ProcessManager.init_processes("main", "zarr") fp = FilePattern(inpDir, filePattern) for files in fp(): for file in files: ProcessManager.submit_process(image_to_zarr, file["file"], outDir) ProcessManager.join_processes()
def main(input_dir: Path, output_dir: Path ) -> None: logger.info('Extracting tiffs and saving as ome.tif...') files = [f for f in Path(input_dir).iterdir() if f.suffix=='.czi'] if not files: logger.error('No CZI files found.') raise ValueError('No CZI files found.') ProcessManager.init_processes() for file in files: ProcessManager.submit_process(extract_fovs,file,output_dir) ProcessManager.join_processes()
def max_min_projection(br, bw, x_range, y_range, **kwargs): """ Calculate the max or min intensity projection of a section of the input image. Args: br (BioReader object): input file object bw (BioWriter object): output file object x_range (tuple): x-range of the img to be processed y_range (tuple): y-range of the img to be processed Returns: image array : Max IP of the input volume """ with ProcessManager.thread(): br.max_workers = ProcessManager._active_threads bw.max_workers = ProcessManager._active_threads # set projection method if not 'method' in kwargs: method = np.max else: method = kwargs['method'] # x,y range of the volume x, x_max = x_range y, y_max = y_range # iterate over depth for ind, z in enumerate(range(0, br.Z, tile_size_z)): z_max = min([br.Z, z + tile_size_z]) if ind == 0: out_image = method(br[y:y_max, x:x_max, z:z_max, 0, 0], axis=2) else: out_image = np.dstack((out_image, method(br[y:y_max, x:x_max, z:z_max, 0, 0], axis=2))) # output image bw[y:y_max, x:x_max, 0:1, 0, 0] = method(out_image, axis=2)
_connectivity = int(args.connectivity) logger.info(f'connectivity = {_connectivity}') _input_dir = Path(args.inpDir).resolve() assert _input_dir.exists(), f'{_input_dir } does not exist.' if _input_dir.joinpath('images').is_dir(): _input_dir = _input_dir.joinpath('images') logger.info(f'inpDir = {_input_dir}') _output_dir = Path(args.outDir).resolve() assert _output_dir.exists(), f'{_output_dir } does not exist.' logger.info(f'outDir = {_output_dir}') # We only need a thread manager since labeling and image reading/writing # release the gil ProcessManager.init_threads() # Get all file names in inpDir image collection _files = list( filter( lambda _file: _file.is_file() and _file.name.endswith('.ome.tif'), _input_dir.iterdir())) _small_files, _large_files = filter_by_size(_files, 500) logger.info(f'processing {len(_files)} images in total...') logger.info(f'processing {len(_small_files)} small images with cython...') logger.info(f'processing {len(_large_files)} large images with rust') if _small_files: for _infile in _small_files: ProcessManager.submit_thread(
def make_tile(x_min: int, x_max: int, y_min: int, y_max: int, z: int, parsed_vector: dict, bw: BioWriter) -> None: """Create a supertile from images and save to file This method builds a supertile, which is a section of the image defined by the global variable ``chunk_size`` and is composed of multiple smaller tiles defined by the ``BioReader._TILE_SIZE``. Images are stored on disk as compressed chunks that are ``_TILE_SIZE`` length and width, and the upper left pixel of a tile is always a multiple of ``_TILE_SIZE``. To prevent excessive file loading and to ensure files are properly placed, supertiles are created from smaller images and saved all at once. Args: x_min: Minimum x bound of the tile x_max: Maximum x bound of the tile y_min: Minimum y bound of the tile y_max: Maximum y bound of the tile z: Current z position to assemble parsed_vector: The result of _parse_vector local_threads: Used to determine the number of concurrent threads to run bw: The output file object """ with ProcessManager.thread() as active_threads: # Get the data type with BioReader(parsed_vector['filePos'][0]['file']) as br: dtype = br.dtype # initialize the supertile template = numpy.zeros((y_max-y_min,x_max-x_min,1,1,1),dtype=dtype) # get images in bounds of current super tile for f in parsed_vector['filePos']: # check that image is within the x-tile bounds if (f['posX'] >= x_min and f['posX'] <= x_max) \ or (f['posX']+f['width'] >= x_min and f['posX']+f['width'] <= x_max) \ or (f['posX'] <= x_min and f['posX']+f['width'] >= x_max): # check that image is within the y-tile bounds if (f['posY'] >= y_min and f['posY'] <= y_max) \ or (f['posY']+f['height'] >= y_min and f['posY']+f['height'] <= y_max) \ or (f['posY'] <= y_min and f['posY']+f['height'] >= y_max): # get bounds of image within the tile Xt = [max(0,f['posX']-x_min)] Xt.append(min(x_max-x_min,f['posX']+f['width']-x_min)) Yt = [max(0,f['posY']-y_min)] Yt.append(min(y_max-y_min,f['posY']+f['height']-y_min)) # get bounds of image within the image Xi = [max(0,x_min - f['posX'])] Xi.append(min(f['width'],x_max - f['posX'])) Yi = [max(0,y_min - f['posY'])] Yi.append(min(f['height'],y_max - f['posY'])) # Load the image with BioReader(f['file'],max_workers=active_threads.count) as br: image = br[Yi[0]:Yi[1],Xi[0]:Xi[1],z:z+1,0,0] # only get the first c,t layer # Put the image in the buffer template[Yt[0]:Yt[1],Xt[0]:Xt[1],...] = image # Save the image bw.max_workers = ProcessManager._active_threads bw[y_min:y_max,x_min:x_max,z:z+1,0,0] = template
def _parse_stitch(stitchPath: pathlib.Path, timepointName: bool = False) -> dict: """ Load and parse image stitching vectors This function parses the data from a stitching vector, then extracts the relevant image sizes for each image in the stitching vector to obtain a stitched image size. This function also infers an output file name. Args: stitchPath: A path to stitching vectors timepointName: Use the vector timeslice as the image name Returns: Dictionary with keys (width, height, name, filePos) """ # Initialize the output out_dict = { 'width': int(0), 'height': int(0), 'name': '', 'filePos': []} # Try to parse the stitching vector using the infered file pattern if fp.pattern != '.*': vp = filepattern.VectorPattern(stitchPath,fp.pattern) unique_vals = {k.upper():v for k,v in vp.uniques.items() if len(v)==1} files = fp.get_matching(**unique_vals) else: # Try to infer a pattern from the stitching vector try: vector_files = filepattern.VectorPattern(stitchPath,'.*') pattern = filepattern.infer_pattern([v[0]['file'] for v in vector_files()]) vp = filepattern.VectorPattern(stitchPath,pattern) # Fall back to universal filepattern except ValueError: vp = filepattern.VectorPattern(stitchPath,'.*') files = fp.files file_names = [f['file'].name for f in files] for file in vp(): if file[0]['file'] not in file_names: continue stitch_groups = {k:get_number(v) for k,v in file[0].items()} stitch_groups['file'] = files[0]['file'].with_name(stitch_groups['file']) # Get the image size stitch_groups['width'], stitch_groups['height'] = BioReader.image_size(stitch_groups['file']) # Set the stitching vector values in the file dictionary out_dict['filePos'].append(stitch_groups) # Calculate the output image dimensions out_dict['width'] = max([f['width'] + f['posX'] for f in out_dict['filePos']]) out_dict['height'] = max([f['height'] + f['posY'] for f in out_dict['filePos']]) # Generate the output file name if timepointName: global_regex = ".*global-positions-([0-9]+).txt" name = re.match(global_regex,pathlib.Path(stitchPath).name).groups()[0] name += '.ome.tif' out_dict['name'] = name ProcessManager.job_name(out_dict['name']) ProcessManager.log(f'Setting output name to timepoint slice number.') else: # Try to infer a good filename try: out_dict['name'] = vp.output_name() ProcessManager.job_name(out_dict['name']) ProcessManager.log(f'Inferred output file name from vector.') # A file name couldn't be inferred, default to the first image name except: ProcessManager.job_name(out_dict['name']) ProcessManager.log(f'Could not infer output file name from vector, using first file name in the stitching vector as an output file name.') for file in vp(): out_dict['name'] = file[0]['file'] break return out_dict
def extract_fovs(file_path: Path, out_path: Path): """ Extract individual FOVs from a czi file When CZI files are loaded by BioFormats, it will generally try to mosaic images together by stage position if the image was captured with the intention of mosaicing images together. At the time this function was written, there was no clear way of extracting individual FOVs so this algorithm was created. Every field of view in each z-slice, channel, and timepoint contained in a CZI file is saved as an individual image. Args: file_path (Path): Path to CZI file out_path (Path): Path to output directory """ with ProcessManager.process(file_path.name): logger.info('Starting extraction from ' + str(file_path) + '...') if Path(file_path).suffix != '.czi': TypeError("Path must be to a czi file.") base_name = Path(file_path.name).stem # Load files without mosaicing czi = czifile.CziFile(file_path,detectmosaic=False) subblocks = [s for s in czi.filtered_subblock_directory if s.mosaic_index is not None] ind = {'X': [], 'Y': [], 'Z': [], 'C': [], 'T': [], 'Row': [], 'Col': []} # Get the indices of each FOV for s in subblocks: scene = [dim.start for dim in s.dimension_entries if dim.dimension=='S'] if scene is not None and scene[0] != 0: continue for dim in s.dimension_entries: if dim.dimension=='X': ind['X'].append(dim.start) elif dim.dimension=='Y': ind['Y'].append(dim.start) elif dim.dimension=='Z': ind['Z'].append(dim.start) elif dim.dimension=='C': ind['C'].append(dim.start) elif dim.dimension=='T': ind['T'].append(dim.start) row_conv = {y:row for (y,row) in zip(np.unique(np.sort(ind['Y'])),range(0,len(np.unique(ind['Y']))))} col_conv = {x:col for (x,col) in zip(np.unique(np.sort(ind['X'])),range(0,len(np.unique(ind['X']))))} ind['Row'] = [row_conv[y] for y in ind['Y']] ind['Col'] = [col_conv[x] for x in ind['X']] with BioReader(file_path) as br: metadata = br.metadata chan_names = br.cnames for s,i in zip(subblocks,range(0,len(subblocks))): Z = None if len(ind['Z'])==0 else ind['Z'][i] C = None if len(ind['C'])==0 else ind['C'][i] T = None if len(ind['T'])==0 else ind['T'][i] out_file_path = out_path.joinpath(_get_image_name(base_name, row=ind['Row'][i], col=ind['Col'][i], Z=Z, C=C, T=T)) dims = [_get_image_dim(s,'Y'), _get_image_dim(s,'X'), _get_image_dim(s,'Z'), _get_image_dim(s,'C'), _get_image_dim(s,'T')] data = s.data_segment().data().reshape(dims) write_thread(out_file_path, data, metadata, chan_names[C])
dest='outDir', type=str, help='Output collection', required=True) # Parse the arguments args = parser.parse_args() connectivity = int(args.connectivity) logger.info('connectivity = {}'.format(connectivity)) inpDir = Path(args.inpDir) logger.info('inpDir = {}'.format(inpDir)) outDir = Path(args.outDir) logger.info('outDir = {}'.format(outDir)) # We only need a thread manager since labeling and image reading/writing # release the gil ProcessManager.init_threads() # Get all file names in inpDir image collection files = [ f for f in inpDir.iterdir() if f.is_file() and f.name.endswith('.ome.tif') ] for file in files: ProcessManager.submit_thread(label_thread, file, outDir.joinpath(file.name), connectivity) ProcessManager.join_threads()
type=str, help='Output collection', required=True) # Parse the arguments args = parser.parse_args() inpDir = args.inpDir if (Path.is_dir(Path(args.inpDir).joinpath('images'))): # switch to images folder if present fpath = str(Path(args.inpDir).joinpath('images').absolute()) logger.info('inpDir = {}'.format(inpDir)) projectionType = args.projectionType logger.info('projectionType = {}'.format(projectionType)) outDir = args.outDir logger.info('outDir = {}'.format(outDir)) # initialize projection function if projectionType == 'max': projection = max_min_projection method = np.max elif projectionType == 'min': projection = max_min_projection method = np.min elif projectionType == 'mean': projection = mean_projection method = None ProcessManager.init_processes('main', 'intensity') main(inpDir, outDir, projection, method)
def basic(files: typing.List[Path], out_dir: Path, metadata_dir: typing.Optional[Path] = None, darkfield: bool = False, photobleach: bool = False): # Try to infer a filename try: pattern = infer_pattern([f['file'].name for f in files]) fp = FilePattern(files[0]['file'].parent,pattern) base_output = fp.output_name() # Fallback to the first filename except: base_output = files[0]['file'].name extension = ''.join(files[0]['file'].suffixes) with ProcessManager.process(base_output): # Load files and sort ProcessManager.log('Loading and sorting images...') img_stk,X,Y = _get_resized_image_stack(files) img_stk_sort = np.sort(img_stk) # Initialize options new_options = _initialize_options(img_stk_sort,darkfield,OPTIONS) # Initialize flatfield/darkfield matrices ProcessManager.log('Beginning flatfield estimation') flatfield_old = np.ones((new_options['size'],new_options['size']),dtype=np.float64) darkfield_old = np.random.normal(size=(new_options['size'],new_options['size'])).astype(np.float64) # Optimize until the change in values is below tolerance or a maximum number of iterations is reached for w in range(new_options['max_reweight_iterations']): # Optimize using inexact augmented Legrangian multiplier method using L1 loss A, E1, A_offset = _inexact_alm_l1(copy.deepcopy(img_stk_sort),new_options) # Calculate the flatfield/darkfield images and update training weights flatfield, darkfield, new_options = _get_flatfield_and_reweight(A,E1,A_offset,new_options) # Calculate the change in flatfield and darkfield images between iterations mad_flat = np.sum(np.abs(flatfield-flatfield_old))/np.sum(np.abs(flatfield_old)) temp_diff = np.sum(np.abs(darkfield - darkfield_old)) if temp_diff < 10**-7: mad_dark =0 else: mad_dark = temp_diff/np.max(np.sum(np.abs(darkfield_old)),initial=10**-6) flatfield_old = flatfield darkfield_old = darkfield # Stop optimizing if the change in flatfield/darkfield is below threshold ProcessManager.log('Iteration {} loss: {}'.format(w+1,mad_flat)) if np.max(mad_flat,initial=mad_dark) < new_options['reweight_tol']: break # Calculate photobleaching effects if specified if photobleach: pb = _get_photobleach(copy.deepcopy(img_stk),flatfield,darkfield) # Resize images back to original image size ProcessManager.log('Saving outputs...') flatfield = cv2.resize(flatfield,(Y,X),interpolation=cv2.INTER_CUBIC).astype(np.float32) if new_options['darkfield']: darkfield = cv2.resize(darkfield,(Y,X),interpolation=cv2.INTER_CUBIC).astype(np.float32) # Export the flatfield image as a tiled tiff flatfield_out = base_output.replace(extension,'_flatfield' + extension) with BioReader(files[0]['file'],max_workers=2) as br: metadata = br.metadata with BioWriter(out_dir.joinpath(flatfield_out),metadata=metadata,max_workers=2) as bw: bw.dtype = np.float32 bw.x = X bw.y = Y bw[:] = np.reshape(flatfield,(Y,X,1,1,1)) # Export the darkfield image as a tiled tiff if new_options['darkfield']: darkfield_out = base_output.replace(extension,'_darkfield' + extension) with BioWriter(out_dir.joinpath(darkfield_out),metadata=metadata,max_workers=2) as bw: bw.dtype = np.float32 bw.x = X bw.y = Y bw[:] = np.reshape(darkfield,(Y,X,1,1,1)) # Export the photobleaching components as csv if photobleach: offsets_out = base_output.replace(extension,'_offsets.csv') with open(metadata_dir.joinpath(offsets_out),'w') as fw: fw.write('file,offset\n') for f,o in zip(files,pb[0,:].tolist()): fw.write("{},{}\n".format(f,o))
def main(input_dir: pathlib.Path, pyramid_type: str, image_type: str, file_pattern: str, output_dir: pathlib.Path): # Set ProcessManager config and initialize ProcessManager.num_processes(multiprocessing.cpu_count()) ProcessManager.num_threads(2 * ProcessManager.num_processes()) ProcessManager.threads_per_request(1) ProcessManager.init_processes('pyr') logger.info('max concurrent processes = %s', ProcessManager.num_processes()) # Parse the input file directory fp = filepattern.FilePattern(input_dir, file_pattern) group_by = '' if 'z' in fp.variables and pyramid_type == 'Neuroglancer': group_by += 'z' logger.info( 'Stacking images by z-dimension for Neuroglancer precomputed format.' ) elif 'c' in fp.variables and pyramid_type == 'Zarr': group_by += 'c' logger.info('Stacking channels by c-dimension for Zarr format') elif 't' in fp.variables and pyramid_type == 'DeepZoom': group_by += 't' logger.info('Creating time slices by t-dimension for DeepZoom format.') else: logger.info( f'Creating one pyramid for each image in {pyramid_type} format.') depth = 0 depth_max = 0 image_dir = '' processes = [] for files in fp(group_by=group_by): # Create the output name for Neuroglancer format if pyramid_type in ['Neuroglancer', 'Zarr']: try: image_dir = fp.output_name([file for file in files]) except: pass if image_dir in ['', '.*']: image_dir = files[0]['file'].name # Reset the depth depth = 0 depth_max = 0 pyramid_writer = None for file in files: with bfio.BioReader(file['file'], max_workers=1) as br: if pyramid_type == 'Zarr': d_z = br.c else: d_z = br.z depth_max += d_z for z in range(d_z): pyramid_args = { 'base_dir': output_dir.joinpath(image_dir), 'image_path': file['file'], 'image_depth': z, 'output_depth': depth, 'max_output_depth': depth_max, 'image_type': image_type } pw = PyramidWriter[pyramid_type](**pyramid_args) ProcessManager.submit_process(pw.write_slide) depth += 1 if pyramid_type == 'DeepZoom': pw.write_info() if pyramid_type in ['Neuroglancer', 'Zarr']: if image_type == 'segmentation': ProcessManager.join_processes() pw.write_info() ProcessManager.join_processes()
def _get_higher_res(S: int, slide_writer: PyramidWriter, X: typing.Tuple[int,int] = None, Y: typing.Tuple[int,int] = None, Z: typing.Tuple[int,int] = (0,1)): """ Recursive function for pyramid building This is a recursive function that builds an image pyramid by indicating an original region of an image at a given scale. This function then builds a pyramid up from the highest resolution components of the pyramid (the original images) to the given position resolution. As an example, imagine the following possible pyramid: Scale S=0 1234 / \ Scale S=1 12 34 / \ / \ Scale S=2 1 2 3 4 At scale 2 (the highest resolution) there are 4 original images. At scale 1, images are averaged and concatenated into one image (i.e. image 12). Calling this function using S=0 will attempt to generate 1234 by calling this function again to get image 12, which will then call this function again to get image 1 and then image 2. Note that this function actually builds images in quadrants (top left and right, bottom left and right) rather than two sections as displayed above. Due to the nature of how this function works, it is possible to build a pyramid in parallel, since building the subpyramid under image 12 can be run independently of the building of subpyramid under 34. Args: S: Top level scale from which the pyramid will be built file_path: Path to image slide_writer: object used to encode and write pyramid tiles X: Range of X values [min,max] to get at the indicated scale Y: Range of Y values [min,max] to get at the indicated scale Returns: image: The image corresponding to the X,Y values at scale S """ # Get the scale info scale_info = slide_writer.scale_info(S) if X == None: X = [0,scale_info['size'][0]] if Y == None: Y = [0,scale_info['size'][1]] # Modify upper bound to stay within resolution dimensions if X[1] > scale_info['size'][0]: X[1] = scale_info['size'][0] if Y[1] > scale_info['size'][1]: Y[1] = scale_info['size'][1] if str(S)==slide_writer.scale_info(-1)['key']: with ProcessManager.thread(): with bfio.BioReader(slide_writer.image_path,max_workers=1) as br: image = br[Y[0]:Y[1],X[0]:X[1],Z[0]:Z[1],...].squeeze() # Write the chunk slide_writer.store_chunk(image,str(S),(X[0],X[1],Y[0],Y[1])) return image else: # Initialize the output image = np.zeros((Y[1]-Y[0],X[1]-X[0]),dtype=slide_writer.dtype) # Set the subgrid dimensions subgrid_dims = [[2*X[0],2*X[1]],[2*Y[0],2*Y[1]]] for dim in subgrid_dims: while dim[1]-dim[0] > CHUNK_SIZE: dim.insert(1,dim[0] + ((dim[1] - dim[0]-1)//CHUNK_SIZE) * CHUNK_SIZE) def load_and_scale(*args,**kwargs): sub_image = _get_higher_res(**kwargs) with ProcessManager.thread(): image = args[0] x_ind = args[1] y_ind = args[2] image[y_ind[0]:y_ind[1],x_ind[0]:x_ind[1]] = kwargs['slide_writer'].scale(sub_image) with ThreadPoolExecutor(1) as executor: for y in range(0,len(subgrid_dims[1])-1): y_ind = [subgrid_dims[1][y] - subgrid_dims[1][0],subgrid_dims[1][y+1] - subgrid_dims[1][0]] y_ind = [np.ceil(yi/2).astype('int') for yi in y_ind] for x in range(0,len(subgrid_dims[0])-1): x_ind = [subgrid_dims[0][x] - subgrid_dims[0][0],subgrid_dims[0][x+1] - subgrid_dims[0][0]] x_ind = [np.ceil(xi/2).astype('int') for xi in x_ind] executor.submit(load_and_scale, image,x_ind,y_ind, # args X=subgrid_dims[0][x:x+2], # kwargs Y=subgrid_dims[1][y:y+2], Z=Z, S=S+1, slide_writer=slide_writer) # Write the chunk slide_writer.store_chunk(image,str(S),(X[0],X[1],Y[0],Y[1])) return image