Example #1
0
def make_qc_images(lama_specimen_dir: Path, target: Path, outdir: Path):
    """
    Generate mid-slice images for quick qc of registration process.

    Parameters
    ----------
    lama_specimen_dir
        The registration outdir. Should contain an 'output' folder
    target
        The target image to display in cyan
    outdir
        Where to put the qc images

    Notes
    -----
    Make qc images from:
        The final registration stage.
            What the volumes look like after registration
        The rigidly-registered images with the inverted labels overlaid
            This is a good indicator of regsitration accuracy

    """
    target = common.LoadImage(target).array
    # Make qc images for all stages of registration including any resolution images
    try:
        paths = SpecimenDataPaths(lama_specimen_dir).setup()
    except FileNotFoundError as e:
        logging.exception(f'cannot find specimen directory\n{e}')

    # Order output dirs by qc type
    red_cyan_dir = outdir / 'red_cyan_overlays'
    greyscale_dir = outdir / 'greyscales'
    red_cyan_dir.mkdir(exist_ok=True)
    greyscale_dir.mkdir(exist_ok=True)

    for i, (stage, img_path) in enumerate(paths.registration_imgs()):
        img = common.LoadImage(img_path).array
        make_red_cyan_qc_images(target, img, red_cyan_dir, greyscale_dir, img_path.stem, i, stage)

    if paths.inverted_labels_dirs:
        # TODO: First reg img will be either the rigid-registered image if tehre are no resolution intermediate images,
        # which is relly what we want want. Other wise it will be the first resolotio image, which will do for now,
        # as they are usually very similar
        first_reg_dir = paths.reg_dirs[0]
        # if we had rigid, affine , deformable stages. We would need to overlay rigid image ([0]) with the label that
        # had finally had the inverted affine transform applied to it ([1)
        inverted_label_dir = paths.inverted_labels_dirs[1]
        inverted_label_overlays_dir = outdir / 'inverted_label_overlay'
        inverted_label_overlays_dir.mkdir(exist_ok=True)

        overlay_labels(first_reg_dir,
                       inverted_label_dir,
                       inverted_label_overlays_dir)
Example #2
0
def load_mask(parent_dir: Path, mask_path: Path) -> np.ndarray:
    """
    Mask is used in multiple datagetter so we load it independently of the classes.

    Parameters
    ----------
    parent_dir
        ?
    mask_path
        mmask_name

    Raises
    ------
    ValueError if mask contains anything other than ones and zeroes

    Returns
    -------
    mask 3D
    """
    mask = common.LoadImage(parent_dir / mask_path).array

    if set([0, 1]) != set(np.unique(mask)):
        logging.error("Mask image should contain only ones and zeros ")
        raise ValueError("Mask image should contain only ones and zeros ")

    return mask
Example #3
0
    def memorymap_data(self, lama_root_dir: Path) -> Dict[str, np.memmap]:
        """
        Iterate over output folder getting each ...........
        Parameters
        ----------
        lama_root_dir

        Returns
        -------

        """

        imgs = OrderedDict()

        for line_dir, spec_dir in specimen_iterator(lama_root_dir):
            config_file = common.getfile_endswith(
                '.toml')  # Get the Lama config from the specimen directory
            config = LamaConfig(config_file)
            reg_dir = config['root_reg_dir']
            basename = os.path.basename(imgpath)
            loader = common.LoadImage(imgpath)

            if not loader:
                logging.error("Problem normalising image: {}".format(
                    loader.error_msg))
                sys.exit()
            arr = loader.array
            t = tempfile.TemporaryFile()
            m = np.memmap(t, dtype=arr.dtype, mode='w+', shape=arr.shape)
            m[:] = arr
            imgs[basename] = m
        return imgs
Example #4
0
def overlay_labels(first_stage_reg_dir: Path,
                   inverted_labeldir: Path,
                   out_dir_labels: Path):
    """
    Overlay the first registrated image (rigid) with the corresponding inverted labels
    It depends on the registered volumes and inverted label maps being named identically

    TODO: Add axial and coronal views.
    """

    for vol_path in common.get_file_paths(first_stage_reg_dir, ignore_folder=IGNORE_FOLDER):

        vol_reader = common.LoadImage(vol_path)

        if not vol_reader:
            logging.error(f'cannnot create qc image from {vol_path}')
            return

        label_path = inverted_labeldir / vol_path.stem / vol_path.name

        if label_path.is_file():
            label_reader = common.LoadImage(label_path)

            if not label_reader:
                logging.error(f'cannot create qc image from label file {label_path}')
                return

            cast_img = sitk.Cast(sitk.RescaleIntensity(vol_reader.img), sitk.sitkUInt8)
            arr = sitk.GetArrayFromImage(cast_img)
            slice_ = np.flipud(arr[:, :, arr.shape[2] // 2])
            l_arr = label_reader.array
            l_slice_ = np.flipud(l_arr[:, :, l_arr.shape[2] // 2])

            base = splitext(basename(label_reader.img_path))[0]
            out_path = join(out_dir_labels, base + '.png')
            blend_8bit(slice_, l_slice_, out_path)
        else:
            logging.info('No inverted label found. Skipping creation of inverted label-image overlay')
Example #5
0
    def _read(self, paths: Iterable) -> List[np.ndarray]:
        """
        - Read in the voxel-based data into 3D arrays
        - Apply guassian blur to the 3D image
        - mask
        - Unravel


        Parameters
        ----------
        paths
            Path to load

        Returns
        -------
        List of numpy arrays of blurred, masked, and raveled data
        """

        images = []

        for data_path in paths:
            logging.info(f'loading data: {data_path.name}')
            loader = common.LoadImage(data_path)

            if not self.shape:
                self.shape = loader.array.shape

            blurred_array = blur(loader.array, self.blur_fwhm, self.voxel_size)
            masked = blurred_array[self.mask != False]

            if self.memmap:
                t = tempfile.TemporaryFile()
                m = np.memmap(t,
                              dtype=masked.dtype,
                              mode='w+',
                              shape=masked.shape)
                m[:] = masked
                masked = m

            images.append(masked)

        return images
Example #6
0
    def check_images(self):
        """
        validate that image paths are correct and give loadeable volumes
        """

        img_dir = self.options['inputs']

        # Inputs is a folder
        if os.path.isdir(img_dir):
            imgs = os.listdir(img_dir)

        # Inputs is a list of paths
        elif os.path.isfile(img_dir):
            imgs = common.get_inputs_from_file_list(img_dir, self.config_dir)
        else:
            logging.error("'inputs:' should refer to a directory of images or a file containing image paths")
            sys.exit(1)
        logging.info('validating input volumes')

        dtypes = {}

        for im_name in imgs:
            image_path = join(img_dir, im_name)

            array_load = common.LoadImage(image_path)
            if not array_load:
                logging.error(array_load.error_msg)
                raise FileNotFoundError(f'cannot load {image_path}')

            self.check_dtype(self.config, array_load.array, array_load.img_path)
            self.check_16bit_elastix_parameters_set(self.config, array_load.array)
            dtypes[im_name] = array_load.array.dtype

        if len(set(dtypes.values())) > 1:
            dtype_str = ""
            for k, v in list(dtypes.items()):
                dtype_str += k + ':\t' + str(v) + '\n'
            logging.warning('The input images have a mixture of data types\n{}'.format(dtype_str))
Example #7
0
def whole_volume_staging(propagated_mask_dir: Path, outdir: Path):
    """
    Generate a csv of whole embryo volumes.

    Parameters
    ----------
    propagated_mask_dir:  masks that have been inverted back to rigid or original inputs
    outdir: where to put the resulting staging csv

    """
    output = {}

    for mask_folder in propagated_mask_dir.iterdir():

        if not mask_folder.is_dir():
            continue

        # The mask with start with the same name as the folder + an image extension
        mask_path = common.getfile_startswith(mask_folder, mask_folder.name)

        mask_array = common.LoadImage(mask_path).array
        embryo_vol_voxels = mask_array[mask_array == 1].size
        output[mask_folder.name] = embryo_vol_voxels
    _write_output(output, outdir)
Example #8
0
def run(config_path: Path,
        wt_dir: Path,
        mut_dir: Path,
        out_dir: Path,
        target_dir: Path,
        treatment_dir: Path = None,
        interaction_dir: Path = None,
        lines_to_process: Union[List, None] = None
        ):
    """
    The entry point to the stats pipeline.
    Read in the stats_config, and iterate over the stats analysis methods and the mutant lines

    Parameters
    ----------
    config_path
        The lama stats_config (in TOML format)

    wt_dir
        Root of the wild type data. Should contain mutant line subfolders

    mut_dir
        Root of the mutant data. Should contain mutant line subfolders

    out_dir
        The root output directory. Will be made if not existing

    target_dir
        Contains the population average, masks, label_maps and label infor files
        All Volumes should have been padded to the same size before registration.

    lines_to_process
        list: optional mutant line ids to process only.
        None: process all lines
    """
    
    if not (wt_dir / 'output').is_dir():
        raise FileNotFoundError(f'{wt_dir / "output"} folder with registration results is not present')
    if not (mut_dir / 'output').is_dir():
        raise FileNotFoundError(f'{mut_dir / "output"} folder with registration results is not present')
    try:
        out_dir.mkdir(exist_ok=True)
    except FileNotFoundError:
        raise FileNotFoundError('Cannot create output folder')

    master_log_file = out_dir / f'{common.date_dhm()}_stats.log'
    logzero.logfile(str(master_log_file))
    logging.info(common.git_log())
    logging.info('### Started stats analysis ###}')

    stats_config = cfg_load(config_path)

    mask = load_mask(target_dir, stats_config['mask'])
    label_info_file = target_dir / stats_config.get('label_info')  # What if not exists
    label_map_file = target_dir / stats_config.get('label_map')
    label_map = common.LoadImage(label_map_file).array

    memmap = stats_config.get('memmap')
    if memmap:
        logging.info('Memory mapping input data')

    baseline_file = stats_config.get('baseline_ids')
    if baseline_file:
        baseline_file = config_path.parent / baseline_file

    mutant_file = stats_config.get('mutant_ids')
    if mutant_file:
        mutant_file = config_path.parent / mutant_file

    # Run each data class through the pipeline.
    for stats_type in stats_config['stats_types']:

        logzero.logfile(str(master_log_file))
        logging.info(f"---Doing {stats_type} analysis---")
        
        gc.collect()
        
        # load the required stats object and data loader
        loader_class = DataLoader.factory(stats_type)

        loader = loader_class(wt_dir, mut_dir, mask, stats_config, label_info_file, lines_to_process=lines_to_process,
                              baseline_file=baseline_file, mutant_file=mutant_file, memmap=memmap, treatment_dir=treatment_dir, interaction_dir=interaction_dir)

        # Only affects organ vol loader.
        if not stats_config.get('normalise_organ_vol_to_mask'):
            loader.norm_to_mask_volume_on = False

        if loader_class == JacobianDataLoader:
            if stats_config.get('use_log_jacobians') is False:
                loader.data_folder_name = 'jacobians'
        # Currently only the intensity stats get normalised
        loader.normaliser = Normaliser.factory(stats_config.get('normalise'), stats_type)  # move this into subclass

        logging.info("Start iterate through lines")
        common.logMemoryUsageInfo()
  
        #USe different iterator if using doing a two-way analysis
        if stats_config['two_way']:
            line_iterator = loader.two_way_iterator()
            line_input_data = None

        else: 
            line_iterator = loader.line_iterator()
            line_input_data = None
 
        while True:
            try:
                line_input_data = next(line_iterator)
                logging.info(f"Data for line {line_input_data.line} loaded")
                common.logMemoryUsageInfo()
                
                line_id = line_input_data.line
      
                line_stats_out_dir = out_dir / line_id / stats_type
      
                line_stats_out_dir.mkdir(parents=True, exist_ok=True)
                line_log_file = line_stats_out_dir / f'{common.date_dhm()}_stats.log'
                logzero.logfile(str(line_log_file))
      
                logging.info(f"Processing line: {line_id}")
      
                stats_class = Stats.factory(stats_type)
                stats_obj = stats_class(line_input_data, stats_type, stats_config.get('use_staging', True), stats_config.get('two_way', False))
      
                stats_obj.stats_runner = linear_model.lm_r
                stats_obj.run_stats()
      
                logging.info('Statistical analysis finished.')
                common.logMemoryUsageInfo()
                
                logging.info('Writing results...')
                
                rw = ResultsWriter.factory(stats_type)
                writer = rw(stats_obj, mask, line_stats_out_dir, stats_type, label_map, label_info_file, stats_config.get('two_way', False))
                
                logging.info('Finished writing results.')
                common.logMemoryUsageInfo()
                #
                # if stats_type == 'organ_volumes':
                #     c_data = {spec: data['t'] for spec, data in stats_obj.specimen_results.items()}
                #     c_df = pd.DataFrame.from_dict(c_data)
                #     # cluster_plots.tsne_on_raw_data(c_df, line_stats_out_dir)
 
      
                if stats_config.get('invert_stats'):
                    if writer.line_heatmap:  # Organ vols wil not have this
                        # How do I now sensibily get the path to the invert.yaml
                        # get the invert_configs for each specimen in the line
                        logging.info('Writing heatmaps...')
                        logging.info('Propogating the heatmaps back onto the input images ')
                        line_heatmap = writer.line_heatmap
                        line_reg_dir = mut_dir / 'output' / line_id
                        invert_heatmaps(line_heatmap, line_stats_out_dir, line_reg_dir, line_input_data)
                        logging.info('Finished writing heatmaps.')
 
                logging.info(f"Finished processing line: {line_id} - All done")                  
                common.logMemoryUsageInfo()
                               
            except StopIteration:
                if (line_input_data != None):
                    logging.info(f"Finish iterate through lines")
                    line_input_data.cleanup()
                    common.logMemoryUsageInfo()
                break;            
Example #9
0
def pad_volumes(indirs: Iterable[Path],
                max_dims: Tuple,
                outdir: Path,
                clobber: bool,
                filetype: str = 'nrrd'):
    """
    Pad volumes, masks, labels. Output files will have same name as original, but be in a new output folder

    Parameters
    ----------
    indirs
        one or more directories containing volumes to pad (Will search subdirectories for volumes)
    max_dims
        dimensions to pad to (z, y, x)
    outdir
        path to output dir
    """

    if clobber and outdir:
        print('Specifiy either --clobber or an output dir (-o)')
        return
    if not clobber and not outdir:
        print('Specifiy either --clobber or an output dir (-o)')
        return

    if not max_dims:
        max_dims = get_largest_dimensions(indirs)

    print(f'Zero padding to {max_dims}')

    outdir = outdir

    for dir_ in indirs:
        dir_ = Path(dir_)

        if clobber:
            result_dir = dir_
        else:
            result_dir = outdir / dir_.name
            result_dir.mkdir(exist_ok=True, parents=True)

        volpaths = common.get_file_paths(dir_)

        # print('Padding to {} - {} volumes/masks:'.format(str(max_dims), str(len(volpaths))))
        # pad_info = Dict()

        for path in volpaths:

            if clobber:
                outpath = path
            else:
                outpath = result_dir / path.name

            loader = common.LoadImage(path)
            vol = loader.img
            if not vol:
                logging.error('error loading image for padding: {}'.format(
                    loader.error_msg))
                sys.exit()
            vol_dims = vol.GetSize()

            # The voxel differences between the vol dims and the max dims
            diffs = [m - v for m, v in zip(max_dims, vol_dims)]

            # How many pixels to add to the upper bounds of each dimension, divide by two and round down to nearest int
            upper_extend = [d // 2 for d in diffs]

            # In case of differnces that cannot be /2. Get the remainder to add to the lower bound
            remainders = [d % 2 for d in diffs]

            # Add the remainders to the upper bound extension to get the lower bound extension
            lower_extend = [u + r for u, r in zip(upper_extend, remainders)]

            # if any values are negative, stop. We need all volumes to be the same size
            for ex_val in zip(lower_extend, upper_extend):

                if ex_val[0] < 0 or ex_val[1] < 0:
                    msg = (
                        "\ncan't pad images\n"
                        "{} is larger than the specified volume size\n"
                        "Current vol size:{},\n"
                        "Max vol size: {}"
                        "\nCheck the 'pad_dims' in the config file\n".format(
                            basename(path), str(vol_dims), str(max_dims)))

                    logging.error(msg)
                    raise common.LamaDataException(msg)

            # Pad the volume. New pixels set to zero
            padded_vol = sitk.ConstantPad(vol, upper_extend, lower_extend, 0)
            padded_vol.SetOrigin((0, 0, 0))
            padded_vol.SetSpacing((1, 1, 1))

            sitk.WriteImage(padded_vol, str(outpath), True)
            # pad_info['data'][input_basename]['pad'] = [upper_extend, lower_extend]
    print('Finished padding')
Example #10
0
def make_qc_images(lama_specimen_dir: Path,
                   target: Path,
                   outdir: Path,
                   mask: Path,
                   reverse_reg_propagation: bool = False):
    """
    Generate mid-slice images for quick qc of registration process.

    Parameters
    ----------
    lama_specimen_dir
        The registration outdir. Should contain an 'output' folder
    target
        The target image to display in cyan
    outdir
        Where to put the qc images
    mask
        Used to identify the embryo in the image so we can display useful info
    reverse_reg_propagation
        Whether to overlay on orginal unregistered input (False) or on initial, probably rigid, registered image (True)

    Notes
    -----
    Make qc images from:
        The final registration stage.
            What the volumes look like after registration
        The rigidly-registered images with the inverted labels overlaid
            This is a good indicator of regsitration accuracy

    """
    target = common.LoadImage(target).array
    # Make qc images for all stages of registration including any resolution images
    try:
        paths = LamaSpecimenData(lama_specimen_dir).setup()
    except FileNotFoundError as e:
        logging.exception(f'cannot find specimen directory\n{e}')

    # Order output dirs by qc type
    red_cyan_dir = outdir / 'red_cyan_overlays'
    greyscale_dir = outdir / 'greyscales'
    red_cyan_dir.mkdir(exist_ok=True)
    greyscale_dir.mkdir(exist_ok=True)

    try:
        for i, (stage, img_path) in enumerate(paths.registration_imgs()):
            img = common.LoadImage(img_path).array
            _make_red_cyan_qc_images(target, img, red_cyan_dir, greyscale_dir,
                                     img_path.stem, i, stage)

        if paths.inverted_labels_dir:

            # First reg img will the rigid-registered image
            first_reg_dir = paths.reg_dirs[0]

            if reverse_reg_propagation:
                # We have a reverse registration method of label propagation so we overlay the labels that were transformed
                # using the reverse registrtion transform (the final defoemable stage) as the target will have been the
                # Rigid input
                inverted_label_dir = paths.inverted_labels_dir
            else:
                # The labels were propagated using the inverse transfrom method. Therefore we overlay the labels transformed
                # using the tforms up to the inverted affine stage onto the rigid input.
                # (could do inverted rigid labels overalid on orginal input, but on rigid allllows us to compare specimens
                # more easily using this method)
                inverted_label_dir = paths.inverted_labels_dir

            inverted_label_overlays_dir = outdir / 'inverted_label_overlay'
            inverted_label_overlays_dir.mkdir(exist_ok=True)

            _overlay_labels(first_reg_dir,
                            inverted_label_dir,
                            inverted_label_overlays_dir,
                            mask=mask)
    except FileNotFoundError:  # 220221 bodge. lama_reg creates a different file structure tha job_runner. Need to harmonise
        logging.error(
            'No QC images made. This maybe because you used lama_reg rather than lama_job_runner'
        )
Example #11
0
def _overlay_labels(first_stage_reg_dir: Path,
                    inverted_labeldir: Path,
                    out_dir_labels: Path,
                    mask: Path = None):
    """
    Overlay the first registrated image (rigid) with the corresponding inverted labels
    It depends on the registered volumes and inverted label maps being named identically
    """
    if mask:
        mask = sitk.GetArrayFromImage(sitk.ReadImage(str(mask)))
        rp = regionprops(mask)
        # Get the largest label. Likley only one from the mask
        mask_props = list(reversed(sorted(rp, key=lambda x: x.area)))[0]
        bbox = mask_props['bbox']

    for vol_path in common.get_file_paths(
            first_stage_reg_dir,
            ignore_folders=[RESOLUTION_IMGS_DIR, IMG_PYRAMID_DIR]):

        vol_reader = common.LoadImage(vol_path)

        if not vol_reader:
            logging.error(f'cannnot create qc image from {vol_path}')
            return

        label_path = inverted_labeldir / vol_path.stem / vol_path.name

        if label_path.is_file():
            label_reader = common.LoadImage(label_path)

            if not label_reader:
                logging.error(
                    f'cannot create qc image from label file {label_path}')
                return

            cast_img = sitk.Cast(sitk.RescaleIntensity(vol_reader.img),
                                 sitk.sitkUInt8)
            arr = sitk.GetArrayFromImage(cast_img)
            base = splitext(basename(label_reader.img_path))[0]
            l_arr = label_reader.array

            def sag(idx_):
                slice_sag = np.flipud(arr[:, :, idx_])
                l_slice_sag = np.flipud(l_arr[:, :, idx_])
                sag_dir = out_dir_labels / 'sagittal'
                sag_dir.mkdir(exist_ok=True)
                out_path_sag = sag_dir / f'{base}_{idx_}.png'
                _blend_8bit(slice_sag, l_slice_sag, out_path_sag)

            if mask is None:  # get a few slices from middle
                sag_indxs = np.linspace(0, arr.shape[2], 8, dtype=np.int)[2:-2]
            else:
                sag_start = bbox[2]
                sag_end = bbox[5]
                sag_indxs = np.linspace(
                    sag_start, sag_end, 6,
                    dtype=np.int)[1:-1]  # Take the 4 inner slices
            for idx in sag_indxs:
                sag(idx)

            def ax(idx_):
                slice_ax = arr[idx_, :, :]
                l_slice_ax = l_arr[idx_, :, :]
                ax_dir = out_dir_labels / 'axial'
                ax_dir.mkdir(exist_ok=True)
                out_path_ax = ax_dir / f'{base}_{idx_}.png'
                _blend_8bit(slice_ax, l_slice_ax, out_path_ax)

            if mask is None:  # get a few slices from middle
                ax_indxs = np.linspace(0, arr.shape[0], 8, dtype=np.int)[2:-2]
            else:
                ax_start = bbox[0]
                ax_end = bbox[3]
                ax_indxs = np.linspace(ax_start, ax_end, 6, dtype=np.int)[1:-1]
            for idx in ax_indxs:
                ax(idx)

            def cor(idx_):
                slice_cor = np.flipud(arr[:, idx_, :])
                l_slice_cor = np.flipud(l_arr[:, idx_, :])
                cor_dir = out_dir_labels / 'coronal'
                cor_dir.mkdir(exist_ok=True)
                out_path_cor = cor_dir / f'{base}_{idx_}.png'
                _blend_8bit(slice_cor, l_slice_cor, out_path_cor)

            if mask is None:  # get a few slices from middle
                cor_indxs = np.linspace(0, arr.shape[1], 8, dtype=np.int)[2:-2]
            else:
                cor_start = bbox[1]
                cor_end = bbox[4]
                cor_indxs = np.linspace(cor_start, cor_end, 6,
                                        dtype=np.int)[1:-1]
            for idx in cor_indxs:
                cor(idx)

        else:
            logging.info(
                'No inverted label found. Skipping creation of inverted label-image overlay'
            )