Esempio n. 1
0
    def __init__(self,
                 wt_dir: Path,
                 mut_dir: Path,
                 mask: np.ndarray,
                 config: Dict,
                 label_info_file: Path,
                 lines_to_process: Union[List, None] = None,
                 baseline_file: Union[str, None] = None,
                 mutant_file: Union[str, None] = None,
                 memmap: bool = False):
        """

        Parameters
        ----------
        wt_dir
        mut_dir
        mask
        config
        label_info_file
        lines_to_process
        baseline_file
            Path to csv containing baseline ids to use.
            If None, use all baselines
        """
        self.norm_to_mask_volume_on = False

        self.label_info: pd.DataFrame = None

        self.baseline_ids = self.load_ids(baseline_file)

        if mutant_file:
            try:
                self.mutant_ids = common.cfg_load(mutant_file)
            except toml.decoder.TomlDecodeError as e:
                raise ValueError(
                    'The mutant id file is not correctly formatted\n{e}')
        else:
            self.mutant_ids = None

        if label_info_file:
            self.label_info = pd.read_csv(label_info_file)

        self.wt_dir = wt_dir
        self.mut_dir = mut_dir
        self.config = config
        self.label_info_file = label_info_file
        self.lines_to_process = lines_to_process
        self.mask = mask  # 3D mask
        self.shape = None

        self.normaliser = None

        self.blur_fwhm = config.get('blur', DEFAULT_FWHM)
        self.voxel_size = config.get('voxel_size', DEFAULT_VOXEL_SIZE)
        self.memmap = memmap
Esempio n. 2
0
def generate_organ_volumes(config: LamaConfig):

    # Get the final inversion stage
    invert_config = config['inverted_transforms'] / INVERT_CONFIG

    first_stage = cfg_load(invert_config)['inversion_order'][-1]

    inverted_label_dir = config['inverted_labels'] / first_stage

    out_path = config['organ_vol_result_csv']

    # Generate the organ volume csv
    label_sizes(inverted_label_dir, out_path)
Esempio n. 3
0
    def __init__(self, config_path: Path, invertable, outdir, threads=None, noclobber=False):
        """
        Inverts a series of volumes. A yaml config file specifies the order of inverted transform parameters
        to use. This config file should be in the root of the directory containing these inverted tform dirs.

        Also need to input a directory containing volumes/label maps etc to invert. These need to be in directories
        named with the same name as the corresponding inverted tform file directories

        Parameters
        ----------
        config_path
            path to yaml config containing the oder of the inverted directories to use. The directories containing
            propagation tfransfrom files should be in the same diretory
        threads: str/ None
            number of threas to use. If None, use all available threads
        invertable: str
            path to object to invert (raw image, mask, label map etc)
        outdir
            where to store inverted volumes
        invertable: str
            dir or path. If dir, invert all objects within the subdirectories.
                If path to object (eg. labelmap) invert that instead
        noclobber: bool
            if True do not overwrite already inverted labels

        """

        self.noclobber = noclobber

        common.test_installation('transformix')

        self.config = cfg_load(config_path)

        self.invertables = invertable
        self.config_dir = config_path.parent  # The dir containing the inverted elx param files

        self.threads = threads
        self.out_dir = outdir
        common.mkdir_if_not_exists(self.out_dir)

        self.elx_param_prefix = ELX_PARAM_PREFIX
        self.PROPAGATION_TFORM_NAME = None  # Set in subclasses
        self.last_invert_dir = None # I thik this is used as a way to find volumes to do organ vol calculation on
Esempio n. 4
0
 def _get_reg_order(self, spec_root):
     """
     Text file in registrations folder that shows the order of registrations
     """
     reg_order = []
     inv_order = []
     with open((spec_root / 'output' / 'registrations' / REG_DIR_ORDER_CFG),
               'r') as fh:
         for line in fh:
             if line.strip():
                 reg_order.append(line.strip())
     try:
         inv_order_cfg = spec_root / 'output' / 'inverted_transforms' / PROPAGATE_CONFIG
         c = cfg_load(inv_order_cfg)
         for stage in c['label_propagation_order']:
             inv_order.append(stage)
     except FileNotFoundError:
         inv_order = None
     return reg_order, inv_order
Esempio n. 5
0
def test_config_errors():
    """
    Read in the current config that shuld work

    """

    # config_file = registration_root / 'registration_config.toml'
    config_file = registration_root / 'registration_config.toml'
    config = cfg_load(config_file)

    # Staging = embryo_volume needs at least one similarity/affine stage to work
    # for i, stage in enumerate(config['registration_stage_params']):
    #     if stage['elastix_parameters']['Transform'] in ['EulerTransform', 'AffineTransform']:
    #         del(config['registration_stage_params'][i])

    config['registration_stage_params'][:] = [x for x in config['registration_stage_params'] if
                                              x['elastix_parameters']['Transform'] not in ['SimilarityTransform', 'AffineTransform']]

    cfg = validate_config.LamaConfig(config, config_file)
Esempio n. 6
0
def run(config_path: Path,
        wt_dir: Path,
        mut_dir: Path,
        out_dir: Path,
        target_dir: Path,
        treatment_dir: Path = None,
        interaction_dir: Path = None,
        lines_to_process: Union[List, None] = None
        ):
    """
    The entry point to the stats pipeline.
    Read in the stats_config, and iterate over the stats analysis methods and the mutant lines

    Parameters
    ----------
    config_path
        The lama stats_config (in TOML format)

    wt_dir
        Root of the wild type data. Should contain mutant line subfolders

    mut_dir
        Root of the mutant data. Should contain mutant line subfolders

    out_dir
        The root output directory. Will be made if not existing

    target_dir
        Contains the population average, masks, label_maps and label infor files
        All Volumes should have been padded to the same size before registration.

    lines_to_process
        list: optional mutant line ids to process only.
        None: process all lines
    """
    
    if not (wt_dir / 'output').is_dir():
        raise FileNotFoundError(f'{wt_dir / "output"} folder with registration results is not present')
    if not (mut_dir / 'output').is_dir():
        raise FileNotFoundError(f'{mut_dir / "output"} folder with registration results is not present')
    try:
        out_dir.mkdir(exist_ok=True)
    except FileNotFoundError:
        raise FileNotFoundError('Cannot create output folder')

    master_log_file = out_dir / f'{common.date_dhm()}_stats.log'
    logzero.logfile(str(master_log_file))
    logging.info(common.git_log())
    logging.info('### Started stats analysis ###}')

    stats_config = cfg_load(config_path)

    mask = load_mask(target_dir, stats_config['mask'])
    label_info_file = target_dir / stats_config.get('label_info')  # What if not exists
    label_map_file = target_dir / stats_config.get('label_map')
    label_map = common.LoadImage(label_map_file).array

    memmap = stats_config.get('memmap')
    if memmap:
        logging.info('Memory mapping input data')

    baseline_file = stats_config.get('baseline_ids')
    if baseline_file:
        baseline_file = config_path.parent / baseline_file

    mutant_file = stats_config.get('mutant_ids')
    if mutant_file:
        mutant_file = config_path.parent / mutant_file

    # Run each data class through the pipeline.
    for stats_type in stats_config['stats_types']:

        logzero.logfile(str(master_log_file))
        logging.info(f"---Doing {stats_type} analysis---")
        
        gc.collect()
        
        # load the required stats object and data loader
        loader_class = DataLoader.factory(stats_type)

        loader = loader_class(wt_dir, mut_dir, mask, stats_config, label_info_file, lines_to_process=lines_to_process,
                              baseline_file=baseline_file, mutant_file=mutant_file, memmap=memmap, treatment_dir=treatment_dir, interaction_dir=interaction_dir)

        # Only affects organ vol loader.
        if not stats_config.get('normalise_organ_vol_to_mask'):
            loader.norm_to_mask_volume_on = False

        if loader_class == JacobianDataLoader:
            if stats_config.get('use_log_jacobians') is False:
                loader.data_folder_name = 'jacobians'
        # Currently only the intensity stats get normalised
        loader.normaliser = Normaliser.factory(stats_config.get('normalise'), stats_type)  # move this into subclass

        logging.info("Start iterate through lines")
        common.logMemoryUsageInfo()
  
        #USe different iterator if using doing a two-way analysis
        if stats_config['two_way']:
            line_iterator = loader.two_way_iterator()
            line_input_data = None

        else: 
            line_iterator = loader.line_iterator()
            line_input_data = None
 
        while True:
            try:
                line_input_data = next(line_iterator)
                logging.info(f"Data for line {line_input_data.line} loaded")
                common.logMemoryUsageInfo()
                
                line_id = line_input_data.line
      
                line_stats_out_dir = out_dir / line_id / stats_type
      
                line_stats_out_dir.mkdir(parents=True, exist_ok=True)
                line_log_file = line_stats_out_dir / f'{common.date_dhm()}_stats.log'
                logzero.logfile(str(line_log_file))
      
                logging.info(f"Processing line: {line_id}")
      
                stats_class = Stats.factory(stats_type)
                stats_obj = stats_class(line_input_data, stats_type, stats_config.get('use_staging', True), stats_config.get('two_way', False))
      
                stats_obj.stats_runner = linear_model.lm_r
                stats_obj.run_stats()
      
                logging.info('Statistical analysis finished.')
                common.logMemoryUsageInfo()
                
                logging.info('Writing results...')
                
                rw = ResultsWriter.factory(stats_type)
                writer = rw(stats_obj, mask, line_stats_out_dir, stats_type, label_map, label_info_file, stats_config.get('two_way', False))
                
                logging.info('Finished writing results.')
                common.logMemoryUsageInfo()
                #
                # if stats_type == 'organ_volumes':
                #     c_data = {spec: data['t'] for spec, data in stats_obj.specimen_results.items()}
                #     c_df = pd.DataFrame.from_dict(c_data)
                #     # cluster_plots.tsne_on_raw_data(c_df, line_stats_out_dir)
 
      
                if stats_config.get('invert_stats'):
                    if writer.line_heatmap:  # Organ vols wil not have this
                        # How do I now sensibily get the path to the invert.yaml
                        # get the invert_configs for each specimen in the line
                        logging.info('Writing heatmaps...')
                        logging.info('Propogating the heatmaps back onto the input images ')
                        line_heatmap = writer.line_heatmap
                        line_reg_dir = mut_dir / 'output' / line_id
                        invert_heatmaps(line_heatmap, line_stats_out_dir, line_reg_dir, line_input_data)
                        logging.info('Finished writing heatmaps.')
 
                logging.info(f"Finished processing line: {line_id} - All done")                  
                common.logMemoryUsageInfo()
                               
            except StopIteration:
                if (line_input_data != None):
                    logging.info(f"Finish iterate through lines")
                    line_input_data.cleanup()
                    common.logMemoryUsageInfo()
                break;            
Esempio n. 7
0
    def __init__(self, config: Union[Path, Dict], cfg_path: Path=None, no_validate=False):
        """
        Parameters
        ----------
        config
            path to the lama config file
            or
            config dictionary
        cfg_path
            Used for testing. If we want to pass in a dict rather than a path we with also need a path of the project
            directory, which is normally the cfg parent directory

        Raises
        ------
        OSError or subclasses thereof if config file cannot be opened
        """
        if isinstance(config, dict):
            if cfg_path is None:
                raise ValueError("Please supply a project root path")
            self.config = config
            config_path = cfg_path
        elif isinstance(config, Path):
            self.config = common.cfg_load(config)
            config_path = config
        else:
            raise ValueError("config must me a Path or Dict")
        self.config_path = Path(config_path)

        # The variable names mapped to the actual names of output directories
        # If the value is a string, it will be created in the output_dir
        # If the value is a tuple [0] is the folder name and the rest are parent folders
        self.output_path_names = OrderedDict({
            # output_dir must always be 'output' as some other modules depend upon this
            # Must add way to enforce as it can be overriden in the config at the moment
            'output_dir': 'output',
            'target_folder': 'target',
            'qc_dir': 'qc',
            'input_image_histograms': ('input_image_histograms', 'qc'),
            'metric_charts_dir': ('metric_charts', 'qc'),
            'registered_midslice_dir': ('registered_midslices', 'qc'),
            'inverted_label_overlay_dir': ('inverted_label_overlay', 'qc'),
            'cyan_red_dir': ('cyan_red_overlay', 'qc'),
            'average_folder': 'averages',
            'deformations': 'deformations',
            'jacobians': 'jacobians',
            'log_jacobians': 'log_jacobians',
            'jacmat': 'jacobian_matrices',
            'glcm_dir': 'glcms',
            'root_reg_dir': 'registrations',
            'inverted_transforms': 'inverted_transforms',
            'inverted_labels': 'inverted_labels',
            'inverted_stats_masks': 'inverted_stats_masks',
            'organ_vol_result_csv': common.ORGAN_VOLUME_CSV_FILE,
            'additional_seg_dir': 'additional_seg'

        })

        # Options in the config that map to files that can be present in the target folder
        self.target_names = (
            'fixed_mask',
            'stats_mask',
            'fixed_volume',
            'label_map',
            'label_info'
        )

        self.input_options = {
            # Config parameters to be validated (non-elastix related parameters)
            # parameter: ([options...], default)
            # Options can be types to check against or functions that retrn True is value is valid
            'global_elastix_params': ('dict', 'required'),
            'registration_stage_params': ('dict', 'required'),
            'no_qc': ('bool', False),
            'threads': ('int', 4),
            'filetype': ('func', self.validate_filetype),
            'voxel_size': ('float', 14.0),
            'generate_new_target_each_stage': ('bool', False),
            'skip_transform_inversion': ('bool', False),
            'pairwise_registration': ('bool', False),
            'generate_deformation_fields': ('dict', None),
            'staging': ('func', self.validate_staging),
            'data_type': (['uint8', 'int8', 'int16', 'uint16', 'float32'], 'uint8'),
            'glcm': ('bool', False),
            'config_version': ('float', 1.1),
            'stage_targets': (Path, False),
            'fix_folding': (bool, False),
            # 'inverse_transform_method': (['invert_transform', 'reverse_registration'], 'invert_transform')
            'label_propagation': (['invert_transform', 'reverse_registration'], 'reverse_registration'),
            'skip_forward_registration': (bool, False),
            'seg_plugin_dir': (Path, None),

            # The following options are used for saving dsk space
            'write_deformation_vectors': (bool, False),
            'delete_inverted_transforms': (bool, False),
            'write_raw_jacobians': (bool, True),
            'write_log_jacobians': (bool, True),
        }

        # The paths to each stage output dir: stage_id: Path
        self.stage_dirs = OrderedDict()

        self.all_keys = list(self.output_path_names.keys()) + list(self.target_names) + list(self.input_options.keys())

        # options is where the final options (either default or from config) are stored.
        # Paths from config or default will have been resolved relative to config directoryu
        self.options = {}

        self.config_dir = config_path.parent

        # Check if there are any unkown options in the config in order to spot typos
        if no_validate:
            return

        self.check_for_unknown_options()

        self.convert_image_pyramid()

        self.pairwise_check()

        self.check_paths()

        self.check_options()

        self.check_images()

        self.resolve_output_paths()

        self.check_stages()

        self.check_propagation_options()

        self.check_problematic_elx_params()
Esempio n. 8
0
def lama_job_runner(config_path: Path,
                    root_directory: Path,
                    make_job_file: bool = False):
    """

    Parameters
    ----------
    config_path:
        path to registration config file:
    root_directory
        path to root directory. The folder names from job_file.dir will be appending to this path to resolve project directories
    make_job_file
        if true, just make the job_file that other instances can consume

    Notes
    -----
    This function uses a SoftFileLock for locking the job_file csv to prevent multiple instances of this code from
    processing the same line or specimen. A SoftFileLock works by creating a lock file, and the presence of this file
    prevents other instances from accessing it. We don't use FileLock (atlhough this is more robust) as it's not
    supported on nfs file systems. The advantage of SoftFileLock is you can create a lock file manually if
    you want to edit a job file manually while job_runner is running (make sure to delete after editing).

    If this script terminates unexpectedly while it has a lock on the file, it will not be released and the file
    remains. Therefore before running this script, ensure no previous lock file is hanging around.
    """

    if not config_path.is_file():
        raise FileNotFoundError(f"can't find config file {config_path}")

    root_directory = root_directory.resolve()

    job_file = root_directory / JOBFILE_NAME
    lock_file = job_file.with_suffix('.lock')
    lock = SoftFileLock(lock_file)
    # init_file = root_directory / 'init'

    HN = socket.gethostname()

    if make_job_file:

        # Delete any lockfile and job_file that might be present from previous runs.
        if job_file.is_file():
            os.remove(job_file)

        if lock_file.is_file():
            os.remove(lock_file)

        try:
            with lock.acquire(timeout=1):
                logging.info('Making job list file')
                make_jobs_file(job_file, root_directory)
                logging.info(
                    'Job file created!. You can now run job_runner from multiple machines'
                )
                return

        except Timeout:
            print(
                f"Make sure lock file: {lock_file} is not present on running first instance"
            )
            sys.exit()

    config_name = config_path.name

    while True:

        try:
            with lock.acquire(timeout=60):

                # Create a lock then read jobs and add status to job file to ensure job is run once only.
                df_jobs = pd.read_csv(job_file, index_col=0)

                # Get an unfinished job
                jobs_to_do = df_jobs[df_jobs['status'] == 'to_run']

                if len(jobs_to_do) < 1:
                    logging.info("No more jobs left on jobs list")
                    break

                indx = jobs_to_do.index[0]

                vol = root_directory / (jobs_to_do.at[indx, 'job'])

                df_jobs.at[indx, 'status'] = 'running'

                df_jobs.at[indx, 'start_time'] = datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')

                df_jobs.at[indx, 'host'] = socket.gethostname()

                df_jobs.to_csv(job_file)

                # Make a project dir drectory for specimen
                # vol.parent should be the line name
                # vol.stem is the specimen name minus the extension
                spec_root_dir = root_directory / 'output' / vol.parent.name / vol.stem
                spec_input_dir = spec_root_dir / 'inputs'
                spec_input_dir.mkdir(exist_ok=True, parents=True)
                spec_out_dir = spec_root_dir / 'output'
                spec_out_dir.mkdir(exist_ok=True, parents=True)
                shutil.copy(vol, spec_input_dir)

                # Copy the config into the project directory
                dest_config_path = spec_root_dir / config_name

                if dest_config_path.is_file():
                    os.remove(dest_config_path)

                shutil.copy(config_path, dest_config_path)

                # rename the target_folder now we've moved the config
                c = cfg_load(dest_config_path)

                target_folder = config_path.parent / c.get('target_folder')
                # Can't seem to get this to work with pathlib
                target_folder_relpath = os.path.relpath(
                    target_folder, str(dest_config_path.parent))
                c['target_folder'] = target_folder_relpath

                with open(dest_config_path, 'w') as fh:
                    fh.write(toml.dumps(c))

        except Timeout:
            sys.exit('Timed out' + socket.gethostname())

        try:
            print(f'debug {HN}, {linenum()}')
            print(f'trying {vol.name}')
            run_lama.run(dest_config_path)

        except LamaConfigError as lce:
            status = 'config_error'
            logging.exception(f'There is a problem with the config\n{lce}')
            sys.exit()

        except Exception as e:
            if e.__class__.__name__ == 'KeyboardInterrupt':
                logging.info('terminating')
                sys.exit('Exiting')

            status = 'failed'
            logging.exception(e)

        else:
            status = 'complete'

        finally:
            with lock:
                df_jobs = pd.read_csv(job_file, index_col=0)
                df_jobs.at[indx, 'status'] = status
                df_jobs.at[indx, 'end_time'] = datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')
                df_jobs.to_csv(job_file)
    print('Exiting job_runner')
    return True
Esempio n. 9
0
def batch_invert_transform_parameters(config: Union[Path, LamaConfig],
                                      clobber=True, new_log:bool=False):
    """
    Create new elastix TransformParameter files that can then be used by transformix to invert labelmaps, stats etc

    Parameters
    ----------
    config
        path to original reg pipeline config file

    clobber
        if True overwrite inverted parameters present

    new_log:
        Whether to create a new log file. If called from another module, logging may happen there
    """
    common.test_installation('elastix')

    if isinstance(config, (Path, str)):
        config = LamaConfig(config)

    threads = str(config['threads'])

    if new_log:
        common.init_logging(config / 'invert_transforms.log')

    reg_dirs = get_reg_dirs(config)

    # Get the image basenames from the first stage registration folder (usually rigid)
    # ignore images in non-relevent folder that may be present
    volume_names = [x.stem for x in common.get_file_paths(reg_dirs[0], ignore_folders=[RESOLUTION_IMGS_DIR, IMG_PYRAMID_DIR])]

    inv_outdir = config.mkdir('inverted_transforms')

    stages_to_invert = defaultdict(list)

    jobs: List[Dict] = []

    reg_stage_dir: Path

    for i, vol_id in enumerate(volume_names):

        for reg_stage_dir in reg_dirs:

            if not reg_stage_dir.is_dir():
                logging.error('cannot find {}'.format(reg_stage_dir))
                raise FileNotFoundError(f'Cannot find registration dir {reg_stage_dir}')

            inv_stage_dir = inv_outdir / reg_stage_dir.name

            specimen_stage_reg_dir = reg_stage_dir / vol_id
            specimen_stage_inversion_dir = inv_stage_dir / vol_id

            transform_file = common.getfile_startswith(specimen_stage_reg_dir, ELX_TRANSFORM_NAME)
            parameter_file = common.getfile_startswith(reg_stage_dir, ELX_PARAM_PREFIX)

            # Create the folder to put the specimen inversion parameter files in.
            inv_stage_dir.mkdir(exist_ok=True)

            # Add the stage to the inversion order config (in reverse order), if not already.
            if reg_stage_dir.name not in stages_to_invert['label_propagation_order']:
                stages_to_invert['label_propagation_order'].insert(0, reg_stage_dir.name)

            if clobber:
                common.mkdir_force(specimen_stage_inversion_dir)  # Overwrite any inversion file that exist for a single specimen

            # Each registration directory contains a metadata file, which contains the relative path to the fixed volume
            reg_metadata = cfg_load(specimen_stage_reg_dir / common.INDV_REG_METADATA)
            fixed_volume = (specimen_stage_reg_dir / reg_metadata['fixed_vol']).resolve()

            # Invert the Transform parameters with options for normal image inversion

            job = {
                'specimen_stage_inversion_dir': specimen_stage_inversion_dir,
                'parameter_file': abspath(parameter_file),
                'transform_file': transform_file,
                'fixed_volume': fixed_volume,
                'param_file_output_name': 'inversion_parameters.txt',
                'image_replacements': IMAGE_REPLACEMENTS,
                'label_replacements': LABEL_REPLACEMENTS,
                'image_transform_file': PROPAGATE_IMAGE_TRANSFORM,
                'label_transform_file': PROPAGATE_LABEL_TRANFORM,
                'clobber': clobber,
                'threads': threads
            }

            jobs.append(job)

    # By putting each inverison job (a single job per registration stage) we can speed things up a bit
    # If we can get multithreded inversion in elastix we can remove this python multithreading
    pool = Pool(8)
    try:
        pool.map(_invert_transform_parameters, jobs)

    except KeyboardInterrupt:
        print('terminating inversion')
        pool.terminate()
        pool.join()

    # TODO: Should we replace the need for this invert.yaml?
    reg_dir = Path(os.path.relpath(reg_stage_dir, inv_outdir))
    stages_to_invert['registration_directory'] = str(reg_dir)  # Doc why we need this
    # Create a yaml config file so that inversions can be run seperatley
    invert_config = config['inverted_transforms'] / PROPAGATE_CONFIG

    with open(invert_config, 'w') as yf:
        yf.write(yaml.dump(dict(stages_to_invert), default_flow_style=False))
Esempio n. 10
0
def batch_invert_transform_parameters(config: Union[str, LamaConfig],
                                      clobber=True,
                                      new_log: bool = False):
    """
    Create new elastix TransformParameter files that can then be used by transformix to invert labelmaps, stats etc

    Parameters
    ----------
    config
        path to original reg pipeline config file

    clobber
        if True overwrite inverted parameters present

    new_log:
        Whether to create a new log file. If called from another module, logging may happen there
    """
    common.test_installation('elastix')

    if isinstance(config, Path):
        config = LamaConfig(config)

    threads = str(config['threads'])

    if new_log:
        common.init_logging(config / 'invert_transforms.log')

    reg_dirs = get_reg_dirs(config)

    # Get the image basenames from the first stage registration folder (usually rigid)
    # ignore images in non-relevent folder that may be present
    volume_names = [
        x.stem for x in common.get_file_paths(reg_dirs[0],
                                              ignore_folder=IGNORE_FOLDER)
    ]

    inv_outdir = config.mkdir('inverted_transforms')

    stages_to_invert = defaultdict(list)

    jobs: List[Dict] = []

    reg_stage_dir: Path

    for i, vol_id in enumerate(volume_names):

        label_replacements = {
            'FinalBSplineInterpolationOrder': '0',
            'FixedInternalImagePixelType': 'short',
            'MovingInternalImagePixelType': 'short',
            'ResultImagePixelType': 'unsigned char',
            'WriteTransformParametersEachResolution': 'false',
            'WriteResultImageAfterEachResolution': 'false'
        }

        image_replacements = {
            'FinalBSplineInterpolationOrder': '3',
            'FixedInternalImagePixelType': 'float',
            'MovingInternalImagePixelType': 'float',
            'ResultImagePixelType': 'float',
            'WriteTransformParametersEachResolution': 'false',
            'WriteResultImageAfterEachResolution': 'false'
        }

        for reg_stage_dir in reg_dirs:

            if not reg_stage_dir.is_dir():
                logging.error('cannot find {}'.format(reg_stage_dir))
                raise FileNotFoundError(
                    f'Cannot find registration dir {reg_stage_dir}')

            inv_stage_dir = inv_outdir / reg_stage_dir.name

            specimen_stage_reg_dir = reg_stage_dir / vol_id
            specimen_stage_inversion_dir = inv_stage_dir / vol_id

            transform_file = common.getfile_startswith(specimen_stage_reg_dir,
                                                       ELX_TRANSFORM_PREFIX)
            parameter_file = common.getfile_startswith(reg_stage_dir,
                                                       ELX_PARAM_PREFIX)

            # Create the folder to put the specimen inversion parameter files in.
            inv_stage_dir.mkdir(exist_ok=True)

            # Add the stage to the inversion order config (in reverse order), if not already.
            if reg_stage_dir.name not in stages_to_invert['inversion_order']:
                stages_to_invert['inversion_order'].insert(
                    0, reg_stage_dir.name)

            if clobber:
                common.mkdir_force(
                    specimen_stage_inversion_dir
                )  # Overwrite any inversion file that exist for a single specimen

            # Each registration directory contains a metadata file, which contains the relative path to the fixed volume
            reg_metadata = cfg_load(specimen_stage_reg_dir /
                                    common.INDV_REG_METADATA)
            fixed_volume = (specimen_stage_reg_dir /
                            reg_metadata['fixed_vol']).resolve()

            # Invert the Transform parameters with options for normal image inversion

            job = {
                'specimen_stage_inversion_dir': specimen_stage_inversion_dir,
                'parameter_file': abspath(parameter_file),
                'transform_file': transform_file,
                'fixed_volume': fixed_volume,
                'param_file_output_name': 'inversion_parameters.txt',
                'image_replacements': image_replacements,
                'label_replacements': label_replacements,
                'image_transform_file': IMAGE_INVERTED_TRANSFORM,
                'label_transform_file': LABEL_INVERTED_TRANFORM,
                'clobber': clobber,
                'threads': threads
            }

            jobs.append(job)

    # Run the inversion jobs. Currently using only one thread as it seems that elastix now uses multiple threads on the
    # Inversions

    logging.info('inverting with {} threads: '.format(threads))
    pool = Pool(
        1
    )  # 17/09/18 If we can get multithreded inversion in elastix 4.9 we can remove the python multithreading
    try:
        pool.map(_invert_transform_parameters, jobs)

    except KeyboardInterrupt:
        print('terminating inversion')
        pool.terminate()
        pool.join()

    # TODO: Should we replace the need for this invert.yaml?
    reg_dir = Path(os.path.relpath(reg_stage_dir, inv_outdir))
    stages_to_invert['registration_directory'] = str(
        reg_dir)  # Doc why we need this
    # Create a yaml config file so that inversions can be run seperatley
    invert_config = config['inverted_transforms'] / INVERT_CONFIG

    with open(invert_config, 'w') as yf:
        yf.write(yaml.dump(dict(stages_to_invert), default_flow_style=False))
Esempio n. 11
0
    def __init__(self, config_path: Path):
        """
        Parameters
        ----------
        config_path
            pat to the lama config file

        Raises
        ------
        OSError of subclasses thereof if config file cannot be opened
        """

        self.config_path = config_path
        self.config = common.cfg_load(config_path)

        # The variable names mapped to the actual names of output directories
        # If the value is a string, it will be created in the output_dir
        # If the value is a tuple [0] is the folder name and the rest are parent folders
        self.output_path_names = OrderedDict({
            # output_dir must always be 'output' as some other modules depend upon this
            # Must add way to enforce as it can be overriden in the config at the moment
            'output_dir': 'output',
            'target_folder': 'target',
            'qc_dir': 'qc',
            'input_image_histograms': ('input_image_histograms', 'qc'),
            'metric_charts_dir': ('metric_charts', 'qc'),
            'registered_midslice_dir': ('registered_midslices', 'qc'),
            'inverted_label_overlay_dir': ('inverted_label_overlay', 'qc'),
            'cyan_red_dir': ('cyan_red_overlay', 'qc'),
            'average_folder': 'averages',
            'deformations': 'deformations',
            'jacobians': 'jacobians',
            'log_jacobians': 'log_jacobians',
            'jacmat': 'jacobian_matrices',
            'glcm_dir': 'glcms',
            'root_reg_dir': 'registrations',
            'inverted_transforms': 'inverted_transforms',
            'inverted_labels': 'inverted_labels',
            'inverted_stats_masks': 'inverted_stats_masks',
            'organ_vol_result_csv': common.ORGAN_VOLUME_CSV_FILE
        })

        # Options in the config that map to files that can be present in the target folder
        self.target_names = (
            'fixed_mask',
            'stats_mask',
            'fixed_volume',
            'label_map',
            'label_names'
        )

        self.input_options = {

            # parameter: [options...], default]
            # Options can be types or functions
            'global_elastix_params': ('dict', 'required'),
            'registration_stage_params': ('dict', 'required'),
            'no_qc': ('bool', False),
            'threads': ('int', 4),
            'filetype': ('func', self.validate_filetype),
            'voxel_size': ('float', 14.0),
            'generate_new_target_each_stage': ('bool', False),
            'skip_transform_inversion': ('bool', False),
            'pairwise_registration': ('bool', False),
            'generate_deformation_fields': ('dict', None),
            'skip_deformation_fields': ('bool', True),
            'staging': ('func', self.validate_staging),
            'data_type': (['uint8', 'int8', 'int16', 'uint16', 'float32'], 'uint8'),
            'glcm': ('bool', False),
            'config_version': ('float', 1.1)
        }

        # The paths to each stage output dir: stage_id: Path
        self.stage_dirs = OrderedDict()

        self.all_keys = list(self.output_path_names.keys()) + list(self.target_names) + list(self.input_options.keys())

        # options is where the final options (either default or from config) are stored.
        # Paths from config or default will have been resolved relative to config directoryu
        self.options = {}

        self.config_dir = config_path.parent

        # Check if there are any unkown options in the config in order to spot typos
        self.check_for_unknown_options()

        self.convert_image_pyramid()

        self.pairwise_check()

        self.check_paths()

        self.check_options()

        # self.check_images()

        self.resolve_output_paths()

        self.check_stages()
Esempio n. 12
0
def run_registration_schedule(config: LamaConfig,
                              first_stage_only=False) -> Path:
    """
    Run the registrations specified in the config file

    Parameters
    ----------
    config: Parsed and validated lama config
    first_stage_only: If True, just do the initial rigid stage

    Returns
    -------
    The path to the final registrered images
    """
    st = config['stage_targets']
    if st:
        with open(st, 'r') as stfh:
            stage_targets = cfg_load(stfh)['targets']
        if len(config['registration_stage_params']) != len(stage_targets):
            logging.error(f'Len stage targets: {len(stage_targets)}')
            logging.error(
                f'Len reg stages: {len(config["registration_stage_params"])}')
            raise LamaConfigError(
                "restage len != number of registration stages")

    # Create a folder to store mid section coronal images to keep an eye on registration process
    if not config['no_qc']:
        qc_metric_dir = config['metric_charts_dir']

    elastix_stage_parameters = generate_elx_parameters(
        config, do_pairwise=config['pairwise_registration'])
    regenerate_target = config['generate_new_target_each_stage']

    if regenerate_target and st:
        raise LamaConfigError(
            'cannot have regenerate_target and stage_targets')

    if regenerate_target:
        logging.info(
            'Creating new target each stage for population average creation')
    else:
        logging.info('Using same target for each stage')

    # Set the moving volume dir and the fixed image for the first stage
    moving_vols_dir = config['inputs']

    # Set the fixed volume up for the first stage. This will checnge each stage if doing population average
    if st:
        fixed_vol = stage_targets[0]
    else:
        fixed_vol = config['fixed_volume']

    for i, reg_stage in enumerate(config['registration_stage_params']):

        tform_type = reg_stage['elastix_parameters']['Transform']
        euler_stage = True if tform_type == 'EulerTransform' else False
        # affine_similarity_stage = True if tform_type in ['AffineTransform', 'SimilarityTransform'] else False

        if config['pairwise_registration']:
            if not euler_stage:
                logging.info('doing pairwise registration')
                reg_method = PairwiseBasedRegistration
            else:
                reg_method = TargetBasedRegistration
                logging.info(
                    'using target-based registration for initial rigid stage of pairwise registrations'
                )

        else:
            logging.info('using target-based registration')
            reg_method = TargetBasedRegistration

        #  Make the stage output dir
        stage_id = reg_stage['stage_id']
        stage_dir = config.stage_dirs[stage_id]

        common.mkdir_force(stage_dir)

        logging.info("### Current registration step: {} ###".format(stage_id))

        # Make the elastix parameter file for this stage
        elxparam = elastix_stage_parameters[stage_id]
        elxparam_path = join(stage_dir, ELX_PARAM_PREFIX + stage_id + '.txt')

        with open(elxparam_path, 'w') as fh:
            if elxparam:  # Not sure why I put this here
                fh.write(elxparam)

        # if i < 2:  # TODO: shall we keep the fixed mask throughout? I think we should in next release
        #     fixed_mask = config['fixed_mask']

        # # If we are doing target-based phenotype detection, we can used the fixed mask for every stage
        if not config['generate_new_target_each_stage']:
            fixed_mask = config['fixed_mask']
        else:
            fixed_mask = None

        # Do the registrations
        registrator = reg_method(elxparam_path, moving_vols_dir, stage_dir,
                                 config['filetype'], config['threads'],
                                 fixed_mask)

        if (not config['pairwise_registration']) or (
                config['pairwise_registration'] and euler_stage):
            registrator.set_target(fixed_vol)

        if reg_stage['elastix_parameters']['Transform'] == 'BSplineTransform':
            if config['fix_folding']:
                logging.info(f'Folding correction for stage {stage_id} set')
            registrator.fix_folding = config[
                'fix_folding']  # Curently only works for TargetBasedRegistration

        registrator.run()  # Do the registrations for a single stage

        # Make average from the stage outputs
        if regenerate_target:
            average_path = join(config['average_folder'],
                                '{0}.{1}'.format(stage_id, config['filetype']))
            registrator.make_average(average_path)

        if not config['no_qc']:

            stage_metrics_dir = qc_metric_dir / stage_id
            common.mkdir_force(stage_metrics_dir)
            make_charts(stage_dir, stage_metrics_dir)

        # Setup the fixed and moving for the next stage, if there is one
        if i + 1 < len(config['registration_stage_params']):
            if regenerate_target:
                fixed_vol = average_path  # The avergae from the previous step
            elif st:
                fixed_vol = stage_targets[i + 1]

            moving_vols_dir = stage_dir  # Set the output of the current stage top be the input of the next

        if first_stage_only:
            return stage_dir

    logging.info("### Registration finished ###")

    return stage_dir
Esempio n. 13
0
def secondary_segmentation(config: LamaConfig):
    """
    Use user-added scripts to segment/cleanup organs

    Parameters
    ----------
    config

    Returns
    -------

    """

    plugin_dir = config.config_dir / config['seg_plugin_dir']

    if not plugin_dir.is_dir():
        logging.error(f'Cannot find plugin director: {plugin_dir}')
        return

    # Find the directories containing the segmentations
    # Get the final inversion stage
    invert_config = config['inverted_transforms'] / PROPAGATE_CONFIG
    segmentation_dir = cfg_load(invert_config)['label_propagation_order'][
        -1]  # rename to segmentation stage
    inverted_label_dir = config['inverted_labels'] / segmentation_dir
    initial_segmentation_path = next(inverted_label_dir.glob('**/*.nrrd'))

    first_reg_dir = config['root_reg_dir'] / config[
        'registration_stage_params'][0]['stage_id']  # usually rigid
    image_to_segment = next(first_reg_dir.glob('**/*.nrrd'))

    segmentations = []

    for plugin_src in [
            x for x in plugin_dir.iterdir()
            if str(x).endswith('.py') and x.name != 'plugin_interface.py'
    ]:

        # catch all exceptions as we don't want plugin crashing the pipeline
        try:
            spec = importlib.util.spec_from_file_location(
                plugin_src.stem, str(plugin_src))
            plugin = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(plugin)

            new_segmetation = plugin.run(image_to_segment,
                                         initial_segmentation_path)

        except Exception as e:
            logging.error(f'Plugin {plugin_src} failed\n{e}')
        else:
            segmentations.append(new_segmetation)

    if not segmentations:
        logging.error(f'No segmentations returned from {plugin_src.name}')

    # Merge all the segmentations into a single label map. If there are any overlaps, the plugin called last will have
    # priority

    seg = None

    for s in segmentations:
        if not seg:
            seg = s
            continue
        seg[s != 0] = s[s != 0]

    additional_seg_dir = config.mkdir('additional_seg_dir')
    write_array(seg, additional_seg_dir /
                f'{config.config_dir.name}_additonal_seg.nrrd'
                )  # TODO include specimen name