def make_qc_images(lama_specimen_dir: Path, target: Path, outdir: Path): """ Generate mid-slice images for quick qc of registration process. Parameters ---------- lama_specimen_dir The registration outdir. Should contain an 'output' folder target The target image to display in cyan outdir Where to put the qc images Notes ----- Make qc images from: The final registration stage. What the volumes look like after registration The rigidly-registered images with the inverted labels overlaid This is a good indicator of regsitration accuracy """ target = common.LoadImage(target).array # Make qc images for all stages of registration including any resolution images try: paths = SpecimenDataPaths(lama_specimen_dir).setup() except FileNotFoundError as e: logging.exception(f'cannot find specimen directory\n{e}') # Order output dirs by qc type red_cyan_dir = outdir / 'red_cyan_overlays' greyscale_dir = outdir / 'greyscales' red_cyan_dir.mkdir(exist_ok=True) greyscale_dir.mkdir(exist_ok=True) for i, (stage, img_path) in enumerate(paths.registration_imgs()): img = common.LoadImage(img_path).array make_red_cyan_qc_images(target, img, red_cyan_dir, greyscale_dir, img_path.stem, i, stage) if paths.inverted_labels_dirs: # TODO: First reg img will be either the rigid-registered image if tehre are no resolution intermediate images, # which is relly what we want want. Other wise it will be the first resolotio image, which will do for now, # as they are usually very similar first_reg_dir = paths.reg_dirs[0] # if we had rigid, affine , deformable stages. We would need to overlay rigid image ([0]) with the label that # had finally had the inverted affine transform applied to it ([1) inverted_label_dir = paths.inverted_labels_dirs[1] inverted_label_overlays_dir = outdir / 'inverted_label_overlay' inverted_label_overlays_dir.mkdir(exist_ok=True) overlay_labels(first_reg_dir, inverted_label_dir, inverted_label_overlays_dir)
def load_mask(parent_dir: Path, mask_path: Path) -> np.ndarray: """ Mask is used in multiple datagetter so we load it independently of the classes. Parameters ---------- parent_dir ? mask_path mmask_name Raises ------ ValueError if mask contains anything other than ones and zeroes Returns ------- mask 3D """ mask = common.LoadImage(parent_dir / mask_path).array if set([0, 1]) != set(np.unique(mask)): logging.error("Mask image should contain only ones and zeros ") raise ValueError("Mask image should contain only ones and zeros ") return mask
def memorymap_data(self, lama_root_dir: Path) -> Dict[str, np.memmap]: """ Iterate over output folder getting each ........... Parameters ---------- lama_root_dir Returns ------- """ imgs = OrderedDict() for line_dir, spec_dir in specimen_iterator(lama_root_dir): config_file = common.getfile_endswith( '.toml') # Get the Lama config from the specimen directory config = LamaConfig(config_file) reg_dir = config['root_reg_dir'] basename = os.path.basename(imgpath) loader = common.LoadImage(imgpath) if not loader: logging.error("Problem normalising image: {}".format( loader.error_msg)) sys.exit() arr = loader.array t = tempfile.TemporaryFile() m = np.memmap(t, dtype=arr.dtype, mode='w+', shape=arr.shape) m[:] = arr imgs[basename] = m return imgs
def overlay_labels(first_stage_reg_dir: Path, inverted_labeldir: Path, out_dir_labels: Path): """ Overlay the first registrated image (rigid) with the corresponding inverted labels It depends on the registered volumes and inverted label maps being named identically TODO: Add axial and coronal views. """ for vol_path in common.get_file_paths(first_stage_reg_dir, ignore_folder=IGNORE_FOLDER): vol_reader = common.LoadImage(vol_path) if not vol_reader: logging.error(f'cannnot create qc image from {vol_path}') return label_path = inverted_labeldir / vol_path.stem / vol_path.name if label_path.is_file(): label_reader = common.LoadImage(label_path) if not label_reader: logging.error(f'cannot create qc image from label file {label_path}') return cast_img = sitk.Cast(sitk.RescaleIntensity(vol_reader.img), sitk.sitkUInt8) arr = sitk.GetArrayFromImage(cast_img) slice_ = np.flipud(arr[:, :, arr.shape[2] // 2]) l_arr = label_reader.array l_slice_ = np.flipud(l_arr[:, :, l_arr.shape[2] // 2]) base = splitext(basename(label_reader.img_path))[0] out_path = join(out_dir_labels, base + '.png') blend_8bit(slice_, l_slice_, out_path) else: logging.info('No inverted label found. Skipping creation of inverted label-image overlay')
def _read(self, paths: Iterable) -> List[np.ndarray]: """ - Read in the voxel-based data into 3D arrays - Apply guassian blur to the 3D image - mask - Unravel Parameters ---------- paths Path to load Returns ------- List of numpy arrays of blurred, masked, and raveled data """ images = [] for data_path in paths: logging.info(f'loading data: {data_path.name}') loader = common.LoadImage(data_path) if not self.shape: self.shape = loader.array.shape blurred_array = blur(loader.array, self.blur_fwhm, self.voxel_size) masked = blurred_array[self.mask != False] if self.memmap: t = tempfile.TemporaryFile() m = np.memmap(t, dtype=masked.dtype, mode='w+', shape=masked.shape) m[:] = masked masked = m images.append(masked) return images
def check_images(self): """ validate that image paths are correct and give loadeable volumes """ img_dir = self.options['inputs'] # Inputs is a folder if os.path.isdir(img_dir): imgs = os.listdir(img_dir) # Inputs is a list of paths elif os.path.isfile(img_dir): imgs = common.get_inputs_from_file_list(img_dir, self.config_dir) else: logging.error("'inputs:' should refer to a directory of images or a file containing image paths") sys.exit(1) logging.info('validating input volumes') dtypes = {} for im_name in imgs: image_path = join(img_dir, im_name) array_load = common.LoadImage(image_path) if not array_load: logging.error(array_load.error_msg) raise FileNotFoundError(f'cannot load {image_path}') self.check_dtype(self.config, array_load.array, array_load.img_path) self.check_16bit_elastix_parameters_set(self.config, array_load.array) dtypes[im_name] = array_load.array.dtype if len(set(dtypes.values())) > 1: dtype_str = "" for k, v in list(dtypes.items()): dtype_str += k + ':\t' + str(v) + '\n' logging.warning('The input images have a mixture of data types\n{}'.format(dtype_str))
def whole_volume_staging(propagated_mask_dir: Path, outdir: Path): """ Generate a csv of whole embryo volumes. Parameters ---------- propagated_mask_dir: masks that have been inverted back to rigid or original inputs outdir: where to put the resulting staging csv """ output = {} for mask_folder in propagated_mask_dir.iterdir(): if not mask_folder.is_dir(): continue # The mask with start with the same name as the folder + an image extension mask_path = common.getfile_startswith(mask_folder, mask_folder.name) mask_array = common.LoadImage(mask_path).array embryo_vol_voxels = mask_array[mask_array == 1].size output[mask_folder.name] = embryo_vol_voxels _write_output(output, outdir)
def run(config_path: Path, wt_dir: Path, mut_dir: Path, out_dir: Path, target_dir: Path, treatment_dir: Path = None, interaction_dir: Path = None, lines_to_process: Union[List, None] = None ): """ The entry point to the stats pipeline. Read in the stats_config, and iterate over the stats analysis methods and the mutant lines Parameters ---------- config_path The lama stats_config (in TOML format) wt_dir Root of the wild type data. Should contain mutant line subfolders mut_dir Root of the mutant data. Should contain mutant line subfolders out_dir The root output directory. Will be made if not existing target_dir Contains the population average, masks, label_maps and label infor files All Volumes should have been padded to the same size before registration. lines_to_process list: optional mutant line ids to process only. None: process all lines """ if not (wt_dir / 'output').is_dir(): raise FileNotFoundError(f'{wt_dir / "output"} folder with registration results is not present') if not (mut_dir / 'output').is_dir(): raise FileNotFoundError(f'{mut_dir / "output"} folder with registration results is not present') try: out_dir.mkdir(exist_ok=True) except FileNotFoundError: raise FileNotFoundError('Cannot create output folder') master_log_file = out_dir / f'{common.date_dhm()}_stats.log' logzero.logfile(str(master_log_file)) logging.info(common.git_log()) logging.info('### Started stats analysis ###}') stats_config = cfg_load(config_path) mask = load_mask(target_dir, stats_config['mask']) label_info_file = target_dir / stats_config.get('label_info') # What if not exists label_map_file = target_dir / stats_config.get('label_map') label_map = common.LoadImage(label_map_file).array memmap = stats_config.get('memmap') if memmap: logging.info('Memory mapping input data') baseline_file = stats_config.get('baseline_ids') if baseline_file: baseline_file = config_path.parent / baseline_file mutant_file = stats_config.get('mutant_ids') if mutant_file: mutant_file = config_path.parent / mutant_file # Run each data class through the pipeline. for stats_type in stats_config['stats_types']: logzero.logfile(str(master_log_file)) logging.info(f"---Doing {stats_type} analysis---") gc.collect() # load the required stats object and data loader loader_class = DataLoader.factory(stats_type) loader = loader_class(wt_dir, mut_dir, mask, stats_config, label_info_file, lines_to_process=lines_to_process, baseline_file=baseline_file, mutant_file=mutant_file, memmap=memmap, treatment_dir=treatment_dir, interaction_dir=interaction_dir) # Only affects organ vol loader. if not stats_config.get('normalise_organ_vol_to_mask'): loader.norm_to_mask_volume_on = False if loader_class == JacobianDataLoader: if stats_config.get('use_log_jacobians') is False: loader.data_folder_name = 'jacobians' # Currently only the intensity stats get normalised loader.normaliser = Normaliser.factory(stats_config.get('normalise'), stats_type) # move this into subclass logging.info("Start iterate through lines") common.logMemoryUsageInfo() #USe different iterator if using doing a two-way analysis if stats_config['two_way']: line_iterator = loader.two_way_iterator() line_input_data = None else: line_iterator = loader.line_iterator() line_input_data = None while True: try: line_input_data = next(line_iterator) logging.info(f"Data for line {line_input_data.line} loaded") common.logMemoryUsageInfo() line_id = line_input_data.line line_stats_out_dir = out_dir / line_id / stats_type line_stats_out_dir.mkdir(parents=True, exist_ok=True) line_log_file = line_stats_out_dir / f'{common.date_dhm()}_stats.log' logzero.logfile(str(line_log_file)) logging.info(f"Processing line: {line_id}") stats_class = Stats.factory(stats_type) stats_obj = stats_class(line_input_data, stats_type, stats_config.get('use_staging', True), stats_config.get('two_way', False)) stats_obj.stats_runner = linear_model.lm_r stats_obj.run_stats() logging.info('Statistical analysis finished.') common.logMemoryUsageInfo() logging.info('Writing results...') rw = ResultsWriter.factory(stats_type) writer = rw(stats_obj, mask, line_stats_out_dir, stats_type, label_map, label_info_file, stats_config.get('two_way', False)) logging.info('Finished writing results.') common.logMemoryUsageInfo() # # if stats_type == 'organ_volumes': # c_data = {spec: data['t'] for spec, data in stats_obj.specimen_results.items()} # c_df = pd.DataFrame.from_dict(c_data) # # cluster_plots.tsne_on_raw_data(c_df, line_stats_out_dir) if stats_config.get('invert_stats'): if writer.line_heatmap: # Organ vols wil not have this # How do I now sensibily get the path to the invert.yaml # get the invert_configs for each specimen in the line logging.info('Writing heatmaps...') logging.info('Propogating the heatmaps back onto the input images ') line_heatmap = writer.line_heatmap line_reg_dir = mut_dir / 'output' / line_id invert_heatmaps(line_heatmap, line_stats_out_dir, line_reg_dir, line_input_data) logging.info('Finished writing heatmaps.') logging.info(f"Finished processing line: {line_id} - All done") common.logMemoryUsageInfo() except StopIteration: if (line_input_data != None): logging.info(f"Finish iterate through lines") line_input_data.cleanup() common.logMemoryUsageInfo() break;
def pad_volumes(indirs: Iterable[Path], max_dims: Tuple, outdir: Path, clobber: bool, filetype: str = 'nrrd'): """ Pad volumes, masks, labels. Output files will have same name as original, but be in a new output folder Parameters ---------- indirs one or more directories containing volumes to pad (Will search subdirectories for volumes) max_dims dimensions to pad to (z, y, x) outdir path to output dir """ if clobber and outdir: print('Specifiy either --clobber or an output dir (-o)') return if not clobber and not outdir: print('Specifiy either --clobber or an output dir (-o)') return if not max_dims: max_dims = get_largest_dimensions(indirs) print(f'Zero padding to {max_dims}') outdir = outdir for dir_ in indirs: dir_ = Path(dir_) if clobber: result_dir = dir_ else: result_dir = outdir / dir_.name result_dir.mkdir(exist_ok=True, parents=True) volpaths = common.get_file_paths(dir_) # print('Padding to {} - {} volumes/masks:'.format(str(max_dims), str(len(volpaths)))) # pad_info = Dict() for path in volpaths: if clobber: outpath = path else: outpath = result_dir / path.name loader = common.LoadImage(path) vol = loader.img if not vol: logging.error('error loading image for padding: {}'.format( loader.error_msg)) sys.exit() vol_dims = vol.GetSize() # The voxel differences between the vol dims and the max dims diffs = [m - v for m, v in zip(max_dims, vol_dims)] # How many pixels to add to the upper bounds of each dimension, divide by two and round down to nearest int upper_extend = [d // 2 for d in diffs] # In case of differnces that cannot be /2. Get the remainder to add to the lower bound remainders = [d % 2 for d in diffs] # Add the remainders to the upper bound extension to get the lower bound extension lower_extend = [u + r for u, r in zip(upper_extend, remainders)] # if any values are negative, stop. We need all volumes to be the same size for ex_val in zip(lower_extend, upper_extend): if ex_val[0] < 0 or ex_val[1] < 0: msg = ( "\ncan't pad images\n" "{} is larger than the specified volume size\n" "Current vol size:{},\n" "Max vol size: {}" "\nCheck the 'pad_dims' in the config file\n".format( basename(path), str(vol_dims), str(max_dims))) logging.error(msg) raise common.LamaDataException(msg) # Pad the volume. New pixels set to zero padded_vol = sitk.ConstantPad(vol, upper_extend, lower_extend, 0) padded_vol.SetOrigin((0, 0, 0)) padded_vol.SetSpacing((1, 1, 1)) sitk.WriteImage(padded_vol, str(outpath), True) # pad_info['data'][input_basename]['pad'] = [upper_extend, lower_extend] print('Finished padding')
def make_qc_images(lama_specimen_dir: Path, target: Path, outdir: Path, mask: Path, reverse_reg_propagation: bool = False): """ Generate mid-slice images for quick qc of registration process. Parameters ---------- lama_specimen_dir The registration outdir. Should contain an 'output' folder target The target image to display in cyan outdir Where to put the qc images mask Used to identify the embryo in the image so we can display useful info reverse_reg_propagation Whether to overlay on orginal unregistered input (False) or on initial, probably rigid, registered image (True) Notes ----- Make qc images from: The final registration stage. What the volumes look like after registration The rigidly-registered images with the inverted labels overlaid This is a good indicator of regsitration accuracy """ target = common.LoadImage(target).array # Make qc images for all stages of registration including any resolution images try: paths = LamaSpecimenData(lama_specimen_dir).setup() except FileNotFoundError as e: logging.exception(f'cannot find specimen directory\n{e}') # Order output dirs by qc type red_cyan_dir = outdir / 'red_cyan_overlays' greyscale_dir = outdir / 'greyscales' red_cyan_dir.mkdir(exist_ok=True) greyscale_dir.mkdir(exist_ok=True) try: for i, (stage, img_path) in enumerate(paths.registration_imgs()): img = common.LoadImage(img_path).array _make_red_cyan_qc_images(target, img, red_cyan_dir, greyscale_dir, img_path.stem, i, stage) if paths.inverted_labels_dir: # First reg img will the rigid-registered image first_reg_dir = paths.reg_dirs[0] if reverse_reg_propagation: # We have a reverse registration method of label propagation so we overlay the labels that were transformed # using the reverse registrtion transform (the final defoemable stage) as the target will have been the # Rigid input inverted_label_dir = paths.inverted_labels_dir else: # The labels were propagated using the inverse transfrom method. Therefore we overlay the labels transformed # using the tforms up to the inverted affine stage onto the rigid input. # (could do inverted rigid labels overalid on orginal input, but on rigid allllows us to compare specimens # more easily using this method) inverted_label_dir = paths.inverted_labels_dir inverted_label_overlays_dir = outdir / 'inverted_label_overlay' inverted_label_overlays_dir.mkdir(exist_ok=True) _overlay_labels(first_reg_dir, inverted_label_dir, inverted_label_overlays_dir, mask=mask) except FileNotFoundError: # 220221 bodge. lama_reg creates a different file structure tha job_runner. Need to harmonise logging.error( 'No QC images made. This maybe because you used lama_reg rather than lama_job_runner' )
def _overlay_labels(first_stage_reg_dir: Path, inverted_labeldir: Path, out_dir_labels: Path, mask: Path = None): """ Overlay the first registrated image (rigid) with the corresponding inverted labels It depends on the registered volumes and inverted label maps being named identically """ if mask: mask = sitk.GetArrayFromImage(sitk.ReadImage(str(mask))) rp = regionprops(mask) # Get the largest label. Likley only one from the mask mask_props = list(reversed(sorted(rp, key=lambda x: x.area)))[0] bbox = mask_props['bbox'] for vol_path in common.get_file_paths( first_stage_reg_dir, ignore_folders=[RESOLUTION_IMGS_DIR, IMG_PYRAMID_DIR]): vol_reader = common.LoadImage(vol_path) if not vol_reader: logging.error(f'cannnot create qc image from {vol_path}') return label_path = inverted_labeldir / vol_path.stem / vol_path.name if label_path.is_file(): label_reader = common.LoadImage(label_path) if not label_reader: logging.error( f'cannot create qc image from label file {label_path}') return cast_img = sitk.Cast(sitk.RescaleIntensity(vol_reader.img), sitk.sitkUInt8) arr = sitk.GetArrayFromImage(cast_img) base = splitext(basename(label_reader.img_path))[0] l_arr = label_reader.array def sag(idx_): slice_sag = np.flipud(arr[:, :, idx_]) l_slice_sag = np.flipud(l_arr[:, :, idx_]) sag_dir = out_dir_labels / 'sagittal' sag_dir.mkdir(exist_ok=True) out_path_sag = sag_dir / f'{base}_{idx_}.png' _blend_8bit(slice_sag, l_slice_sag, out_path_sag) if mask is None: # get a few slices from middle sag_indxs = np.linspace(0, arr.shape[2], 8, dtype=np.int)[2:-2] else: sag_start = bbox[2] sag_end = bbox[5] sag_indxs = np.linspace( sag_start, sag_end, 6, dtype=np.int)[1:-1] # Take the 4 inner slices for idx in sag_indxs: sag(idx) def ax(idx_): slice_ax = arr[idx_, :, :] l_slice_ax = l_arr[idx_, :, :] ax_dir = out_dir_labels / 'axial' ax_dir.mkdir(exist_ok=True) out_path_ax = ax_dir / f'{base}_{idx_}.png' _blend_8bit(slice_ax, l_slice_ax, out_path_ax) if mask is None: # get a few slices from middle ax_indxs = np.linspace(0, arr.shape[0], 8, dtype=np.int)[2:-2] else: ax_start = bbox[0] ax_end = bbox[3] ax_indxs = np.linspace(ax_start, ax_end, 6, dtype=np.int)[1:-1] for idx in ax_indxs: ax(idx) def cor(idx_): slice_cor = np.flipud(arr[:, idx_, :]) l_slice_cor = np.flipud(l_arr[:, idx_, :]) cor_dir = out_dir_labels / 'coronal' cor_dir.mkdir(exist_ok=True) out_path_cor = cor_dir / f'{base}_{idx_}.png' _blend_8bit(slice_cor, l_slice_cor, out_path_cor) if mask is None: # get a few slices from middle cor_indxs = np.linspace(0, arr.shape[1], 8, dtype=np.int)[2:-2] else: cor_start = bbox[1] cor_end = bbox[4] cor_indxs = np.linspace(cor_start, cor_end, 6, dtype=np.int)[1:-1] for idx in cor_indxs: cor(idx) else: logging.info( 'No inverted label found. Skipping creation of inverted label-image overlay' )