def run(wt_dir: Path, mut_dir: Path, out_dir: Path, num_perms: int, label_info: Path = None, label_map_path: Path = None, line_fdr: float=0.05, specimen_fdr: float=0.2, normalise_to_whole_embryo:bool=True): """ Run the permutation-based stats pipeline Parameters ---------- wt_dir Root of the wild type registration output This should contain an 'output' folder that contains a single baseline folder that contains multiple specimen folders mut_dir Root of the mutant registration output This should contain 'output' folder that contains multiple mutant lines folder, each containing one or more mutant specimen folders out_dir Where to store the intermediate results of the permutation testing num_perms number of permutations to do log_dependent if True, apply numpy.log to all the dependent values (organ volumes) label_info if supplied, use it to annotate the results with label names. Also can be used to filter certain labels from the analysis using the 'no_analysis' column line_fdr the FDR threshold at which to accept line level calls specimen_fdr the FDR threshold at which to accept specimen-level calls normalise_to_whole_embryo: Whether to divide the organ each organ volume by whole embryo volume """ # Collate all the staging and organ volume data into csvs np.random.seed(999) init_logging(out_dir / 'stats.log') logging.info(common.git_log()) logging.info(f'Running {__name__} with followng commands\n{common.command_line_agrs()}') wt_staging = get_staging_data(wt_dir) mut_staging = get_staging_data(mut_dir) wt_organ_vol = get_organ_volume_data(wt_dir) mut_organ_vol = get_organ_volume_data(mut_dir) data = prepare_data(wt_organ_vol, wt_staging, mut_organ_vol, mut_staging, label_meta=label_info, normalise_to_whole_embryo=normalise_to_whole_embryo) # Keep a record of the input data used in the analsysis data.to_csv(out_dir / 'input_data.csv') # Keep raw data for plotting raw_wt_vols = wt_organ_vol.copy() out_dir.mkdir(exist_ok=True, parents=True) # Root directory for output # make directory to store distributions and thresholds dists_out = out_dir / 'distributions' dists_out.mkdir(exist_ok=True) # Get the null distributions line_null, specimen_null, null_ids = distributions.null(data, num_perms) with open(dists_out / 'null_ids.yaml', 'w') as fh: yaml.dump(null_ids, fh) null_line_pvals_file = dists_out / 'null_line_dist_pvalues.csv' null_specimen_pvals_file = dists_out / 'null_specimen_dist_pvalues.csv' # Write the null distributions to file line_null.to_csv(null_line_pvals_file) specimen_null.to_csv(null_specimen_pvals_file) # Get the alternative distribution line_alt, spec_alt = distributions.alternative(data) line_alt_pvals_file = dists_out / 'alt_line_dist_pvalues.csv' spec_alt_pvals_file = dists_out / 'alt_specimen_dist_pvalues.csv' # Write the alternative distributions to file line_alt.to_csv(line_alt_pvals_file) spec_alt.to_csv(spec_alt_pvals_file) line_organ_thresholds = p_thresholds.get_thresholds(line_null, line_alt) specimen_organ_thresholds = p_thresholds.get_thresholds(specimen_null, spec_alt) line_thresholds_path = dists_out / 'line_organ_p_thresholds.csv' spec_thresholds_path = dists_out / 'specimen_organ_p_thresholds.csv' line_organ_thresholds.to_csv(line_thresholds_path) specimen_organ_thresholds.to_csv(spec_thresholds_path) logging.info('Annotating lines') lines_root_dir = out_dir / 'lines' lines_root_dir.mkdir(exist_ok=True) # Annotate lines logging.info(f"Annotating lines, using a FDR threshold of {line_fdr}") annotate(line_organ_thresholds, line_alt, lines_root_dir, label_info=label_info, label_map=label_map_path, write_thresholded_inv_labels=True,fdr_threshold=line_fdr) # Annotate specimens logging.info(f"Annotating specimens, using a FDR threshold of {specimen_fdr}") annotate(specimen_organ_thresholds, spec_alt, lines_root_dir, line_level=False, label_info=label_info, label_map=label_map_path, fdr_threshold=specimen_fdr) mut_dir_ = mut_dir / 'output' make_plots(mut_dir_, raw_wt_vols, wt_staging, label_info, lines_root_dir) dist_plot_root = out_dir / 'distribution_plots' line_plot_dir = dist_plot_root / 'line_level' line_plot_dir.mkdir(parents=True, exist_ok=True) pvalue_dist_plots(line_null, line_alt, line_organ_thresholds, line_plot_dir) specimen_plot_dir = dist_plot_root / 'specimen_level' specimen_plot_dir.mkdir(parents=True, exist_ok=True) pvalue_dist_plots(specimen_null, spec_alt.drop(columns=['line']), specimen_organ_thresholds, specimen_plot_dir)
def batch_invert_transform_parameters(config: Union[Path, LamaConfig], clobber=True, new_log:bool=False): """ Create new elastix TransformParameter files that can then be used by transformix to invert labelmaps, stats etc Parameters ---------- config path to original reg pipeline config file clobber if True overwrite inverted parameters present new_log: Whether to create a new log file. If called from another module, logging may happen there """ common.test_installation('elastix') if isinstance(config, (Path, str)): config = LamaConfig(config) threads = str(config['threads']) if new_log: common.init_logging(config / 'invert_transforms.log') reg_dirs = get_reg_dirs(config) # Get the image basenames from the first stage registration folder (usually rigid) # ignore images in non-relevent folder that may be present volume_names = [x.stem for x in common.get_file_paths(reg_dirs[0], ignore_folders=[RESOLUTION_IMGS_DIR, IMG_PYRAMID_DIR])] inv_outdir = config.mkdir('inverted_transforms') stages_to_invert = defaultdict(list) jobs: List[Dict] = [] reg_stage_dir: Path for i, vol_id in enumerate(volume_names): for reg_stage_dir in reg_dirs: if not reg_stage_dir.is_dir(): logging.error('cannot find {}'.format(reg_stage_dir)) raise FileNotFoundError(f'Cannot find registration dir {reg_stage_dir}') inv_stage_dir = inv_outdir / reg_stage_dir.name specimen_stage_reg_dir = reg_stage_dir / vol_id specimen_stage_inversion_dir = inv_stage_dir / vol_id transform_file = common.getfile_startswith(specimen_stage_reg_dir, ELX_TRANSFORM_NAME) parameter_file = common.getfile_startswith(reg_stage_dir, ELX_PARAM_PREFIX) # Create the folder to put the specimen inversion parameter files in. inv_stage_dir.mkdir(exist_ok=True) # Add the stage to the inversion order config (in reverse order), if not already. if reg_stage_dir.name not in stages_to_invert['label_propagation_order']: stages_to_invert['label_propagation_order'].insert(0, reg_stage_dir.name) if clobber: common.mkdir_force(specimen_stage_inversion_dir) # Overwrite any inversion file that exist for a single specimen # Each registration directory contains a metadata file, which contains the relative path to the fixed volume reg_metadata = cfg_load(specimen_stage_reg_dir / common.INDV_REG_METADATA) fixed_volume = (specimen_stage_reg_dir / reg_metadata['fixed_vol']).resolve() # Invert the Transform parameters with options for normal image inversion job = { 'specimen_stage_inversion_dir': specimen_stage_inversion_dir, 'parameter_file': abspath(parameter_file), 'transform_file': transform_file, 'fixed_volume': fixed_volume, 'param_file_output_name': 'inversion_parameters.txt', 'image_replacements': IMAGE_REPLACEMENTS, 'label_replacements': LABEL_REPLACEMENTS, 'image_transform_file': PROPAGATE_IMAGE_TRANSFORM, 'label_transform_file': PROPAGATE_LABEL_TRANFORM, 'clobber': clobber, 'threads': threads } jobs.append(job) # By putting each inverison job (a single job per registration stage) we can speed things up a bit # If we can get multithreded inversion in elastix we can remove this python multithreading pool = Pool(8) try: pool.map(_invert_transform_parameters, jobs) except KeyboardInterrupt: print('terminating inversion') pool.terminate() pool.join() # TODO: Should we replace the need for this invert.yaml? reg_dir = Path(os.path.relpath(reg_stage_dir, inv_outdir)) stages_to_invert['registration_directory'] = str(reg_dir) # Doc why we need this # Create a yaml config file so that inversions can be run seperatley invert_config = config['inverted_transforms'] / PROPAGATE_CONFIG with open(invert_config, 'w') as yf: yf.write(yaml.dump(dict(stages_to_invert), default_flow_style=False))
def batch_invert_transform_parameters(config: Union[str, LamaConfig], clobber=True, new_log: bool = False): """ Create new elastix TransformParameter files that can then be used by transformix to invert labelmaps, stats etc Parameters ---------- config path to original reg pipeline config file clobber if True overwrite inverted parameters present new_log: Whether to create a new log file. If called from another module, logging may happen there """ common.test_installation('elastix') if isinstance(config, Path): config = LamaConfig(config) threads = str(config['threads']) if new_log: common.init_logging(config / 'invert_transforms.log') reg_dirs = get_reg_dirs(config) # Get the image basenames from the first stage registration folder (usually rigid) # ignore images in non-relevent folder that may be present volume_names = [ x.stem for x in common.get_file_paths(reg_dirs[0], ignore_folder=IGNORE_FOLDER) ] inv_outdir = config.mkdir('inverted_transforms') stages_to_invert = defaultdict(list) jobs: List[Dict] = [] reg_stage_dir: Path for i, vol_id in enumerate(volume_names): label_replacements = { 'FinalBSplineInterpolationOrder': '0', 'FixedInternalImagePixelType': 'short', 'MovingInternalImagePixelType': 'short', 'ResultImagePixelType': 'unsigned char', 'WriteTransformParametersEachResolution': 'false', 'WriteResultImageAfterEachResolution': 'false' } image_replacements = { 'FinalBSplineInterpolationOrder': '3', 'FixedInternalImagePixelType': 'float', 'MovingInternalImagePixelType': 'float', 'ResultImagePixelType': 'float', 'WriteTransformParametersEachResolution': 'false', 'WriteResultImageAfterEachResolution': 'false' } for reg_stage_dir in reg_dirs: if not reg_stage_dir.is_dir(): logging.error('cannot find {}'.format(reg_stage_dir)) raise FileNotFoundError( f'Cannot find registration dir {reg_stage_dir}') inv_stage_dir = inv_outdir / reg_stage_dir.name specimen_stage_reg_dir = reg_stage_dir / vol_id specimen_stage_inversion_dir = inv_stage_dir / vol_id transform_file = common.getfile_startswith(specimen_stage_reg_dir, ELX_TRANSFORM_PREFIX) parameter_file = common.getfile_startswith(reg_stage_dir, ELX_PARAM_PREFIX) # Create the folder to put the specimen inversion parameter files in. inv_stage_dir.mkdir(exist_ok=True) # Add the stage to the inversion order config (in reverse order), if not already. if reg_stage_dir.name not in stages_to_invert['inversion_order']: stages_to_invert['inversion_order'].insert( 0, reg_stage_dir.name) if clobber: common.mkdir_force( specimen_stage_inversion_dir ) # Overwrite any inversion file that exist for a single specimen # Each registration directory contains a metadata file, which contains the relative path to the fixed volume reg_metadata = cfg_load(specimen_stage_reg_dir / common.INDV_REG_METADATA) fixed_volume = (specimen_stage_reg_dir / reg_metadata['fixed_vol']).resolve() # Invert the Transform parameters with options for normal image inversion job = { 'specimen_stage_inversion_dir': specimen_stage_inversion_dir, 'parameter_file': abspath(parameter_file), 'transform_file': transform_file, 'fixed_volume': fixed_volume, 'param_file_output_name': 'inversion_parameters.txt', 'image_replacements': image_replacements, 'label_replacements': label_replacements, 'image_transform_file': IMAGE_INVERTED_TRANSFORM, 'label_transform_file': LABEL_INVERTED_TRANFORM, 'clobber': clobber, 'threads': threads } jobs.append(job) # Run the inversion jobs. Currently using only one thread as it seems that elastix now uses multiple threads on the # Inversions logging.info('inverting with {} threads: '.format(threads)) pool = Pool( 1 ) # 17/09/18 If we can get multithreded inversion in elastix 4.9 we can remove the python multithreading try: pool.map(_invert_transform_parameters, jobs) except KeyboardInterrupt: print('terminating inversion') pool.terminate() pool.join() # TODO: Should we replace the need for this invert.yaml? reg_dir = Path(os.path.relpath(reg_stage_dir, inv_outdir)) stages_to_invert['registration_directory'] = str( reg_dir) # Doc why we need this # Create a yaml config file so that inversions can be run seperatley invert_config = config['inverted_transforms'] / INVERT_CONFIG with open(invert_config, 'w') as yf: yf.write(yaml.dump(dict(stages_to_invert), default_flow_style=False))
def run(configfile: Path): """ This is the main function Lama script for generating data from registering volumes It reads in the config file, creates directories, and initialises the registration process. Looks for paths to inputs relative the directory containing the config file Parameters ---------- param config A toml config file """ try: config = LamaConfig(configfile) except OSError as e: logging.error(f'Cannot open LAMA config file: {str(configfile)}\n{e}') raise except Exception as e: raise (LamaConfigError(e)) config.mkdir('output_dir') qc_dir = config.mkdir('qc_dir') config.mkdir('average_folder') config.mkdir('root_reg_dir') # TODO find the histogram batch code # if not config['no_qc']: # input_histogram_dir = config.mkdir('input_image_histograms') # make_histograms(config['inputs'], input_histogram_dir) logpath = config.config_path.parent / LOG_FILE # Make log in same directory as config file common.init_logging(logpath) if not common.test_installation('elastix'): raise OSError('Make sure elastix is installed') # Catch ctr-c signals so we can write that to logs # signal.signal(signal.SIGTERM, common.service_shutdown) signal.signal(signal.SIGINT, common.service_shutdown) mem_monitor = MonitorMemory(Path(config['output_dir']).absolute()) # Disable QC output? no_qc: bool = config['no_qc'] logging.info(common.git_log() ) # If running from a git repo, log the branch and commit # logging.info("Registration started") final_registration_dir = run_registration_schedule(config) make_deformations_at_different_scales(config) create_glcms(config, final_registration_dir) if config['skip_transform_inversion']: logging.info('Skipping inversion of transforms') else: logging.info('inverting transforms') batch_invert_transform_parameters(config) logging.info('inverting volumes') invert_volumes(config) if config['label_map']: generate_organ_volumes(config) if not generate_staging_data(config): logging.warning('No staging data generated') # Write out the names of the registration dirs in the order they were run with open(config['root_reg_dir'] / REG_DIR_ORDER, 'w') as fh: for reg_stage in config['registration_stage_params']: fh.write(f'{reg_stage["stage_id"]}\n') if not no_qc: if config['skip_transform_inversion']: inverted_label_overlay_dir = None else: inverted_label_overlay_dir = config.mkdir( 'inverted_label_overlay_dir') # registered_midslice_dir = config.mkdir('registered_midslice_dir') make_qc_images(config.config_dir, config['fixed_volume'], qc_dir) mem_monitor.stop() return True
def run(wt_dir: Path, mut_dir: Path, out_dir: Path, num_perms: int, label_info: Path = None, label_map_path: Path = None, line_fdr: float = 0.05, specimen_fdr: float = 0.2, normalise_to_whole_embryo: bool = True, qc_file: Path = None, voxel_size: float = 1.0): """ Run the permutation-based stats pipeline Parameters ---------- wt_dir Root of the wild type registration output This should contain an 'output' folder that contains a single baseline folder that contains multiple specimen folders mut_dir Root of the mutant registration output This should contain 'output' folder that contains multiple mutant lines folder, each containing one or more mutant specimen folders out_dir Where to store the intermediate results of the permutation testing num_perms number of permutations to do log_dependent if True, apply numpy.log to all the dependent values (organ volumes) label_info if supplied, use it to annotate the results with label names. Also can be used to filter certain labels from the analysis using the 'no_analysis' column line_fdr the FDR threshold at which to accept line level calls specimen_fdr the FDR threshold at which to accept specimen-level calls normalise_to_whole_embryo: Whether to divide the organ each organ volume by whole embryo volume qc_file csv indicating labels from specimens that should be excluded from the analysis columns: - id: the specimen id - line: the line id - label: the label to exclude (int) - label_name (optional) voxel_size For calcualting organ volumes """ # Collate all the staging and organ volume data into csvs np.random.seed(999) init_logging(out_dir / 'stats.log') logging.info(common.git_log()) logging.info( f'Running {__name__} with following commands\n{common.command_line_agrs()}' ) logging.info('Searching for staging data') wt_staging = get_staging_data(wt_dir) mut_staging = get_staging_data(mut_dir) logging.info('searching for organ volume data') wt_organ_vol = get_organ_volume_data(wt_dir) mut_organ_vol = get_organ_volume_data(mut_dir) # data # index: spec_id # cols: label_nums, with staging and line columns at the end data = prepare_data(wt_organ_vol, wt_staging, mut_organ_vol, mut_staging, label_meta=label_info, normalise_to_whole_embryo=normalise_to_whole_embryo, qc_file=qc_file) # Keep a record of the input data used in the analsysis data.to_csv(out_dir / 'input_data.csv') # Keep raw data for plotting # raw_wt_vols = wt_organ_vol.copy() # These includes QCd speciemns need to remove out_dir.mkdir(exist_ok=True, parents=True) # Root directory for output # make directory to store distributions and thresholds dists_out = out_dir / 'distributions' dists_out.mkdir(exist_ok=True) # Get the null distributions line_null, specimen_null = distributions.null(data, num_perms) # with open(dists_out / 'null_ids.yaml', 'w') as fh: # yaml.dump(null_ids, fh) null_line_pvals_file = dists_out / 'null_line_dist_pvalues.csv' null_specimen_pvals_file = dists_out / 'null_specimen_dist_pvalues.csv' # Write the null distributions to file line_null.to_csv(null_line_pvals_file) specimen_null.to_csv(null_specimen_pvals_file) # Get the alternative p-value distribution (and t-values now (2 and 3) line_alt, spec_alt, line_alt_t, spec_alt_t = distributions.alternative( data) line_alt_pvals_file = dists_out / 'alt_line_dist_pvalues.csv' spec_alt_pvals_file = dists_out / 'alt_specimen_dist_pvalues.csv' # Write the alternative distributions to file line_alt.to_csv(line_alt_pvals_file) spec_alt.to_csv(spec_alt_pvals_file) line_organ_thresholds = p_thresholds.get_thresholds(line_null, line_alt) specimen_organ_thresholds = p_thresholds.get_thresholds( specimen_null, spec_alt) line_thresholds_path = dists_out / 'line_organ_p_thresholds.csv' spec_thresholds_path = dists_out / 'specimen_organ_p_thresholds.csv' line_organ_thresholds.to_csv(line_thresholds_path) specimen_organ_thresholds.to_csv(spec_thresholds_path) logging.info('Annotating lines') lines_root_dir = out_dir / 'lines' lines_root_dir.mkdir(exist_ok=True) # Annotate lines logging.info(f"Annotating lines, using a FDR threshold of {line_fdr}") line_hits = annotate(line_organ_thresholds, line_alt, lines_root_dir, label_info=label_info, label_map=label_map_path, write_thresholded_inv_labels=True, fdr_threshold=line_fdr, t_values=line_alt_t, organ_volumes=data) line_hits.to_csv(out_dir / 'line_hits.csv') # Annotate specimens logging.info( f"Annotating specimens, using a FDR threshold of {specimen_fdr}") spec_hits = annotate(specimen_organ_thresholds, spec_alt, lines_root_dir, is_line_level=False, label_info=label_info, label_map=label_map_path, fdr_threshold=specimen_fdr, t_values=spec_alt_t, organ_volumes=data) spec_hits.to_csv(out_dir / 'specimen_level_hits.csv') # Make plots data_for_plots = data.copy() data_for_plots.columns = [x.strip('x') for x in data_for_plots.columns] # Strip any xs # If data has been normalised to WEV revert back for plots if normalise_to_whole_embryo: for col in data_for_plots.columns: if col.isdigit(): data_for_plots[ col] = data_for_plots[col] * data_for_plots['staging'] make_plots(mut_dir, data_for_plots, label_info, lines_root_dir, voxel_size=voxel_size) # Get specimen info. Currently just the WEV z-score to highlight specimens that are too small/large spec_info_file = out_dir / 'specimen_info.csv' write_specimen_info(wt_staging, mut_staging, spec_info_file) dist_plot_root = out_dir / 'distribution_plots' line_plot_dir = dist_plot_root / 'line_level' line_plot_dir.mkdir(parents=True, exist_ok=True) pvalue_dist_plots(line_null, line_alt, line_organ_thresholds, line_plot_dir) specimen_plot_dir = dist_plot_root / 'specimen_level' specimen_plot_dir.mkdir(parents=True, exist_ok=True) pvalue_dist_plots(specimen_null, spec_alt.drop(columns=['line']), specimen_organ_thresholds, specimen_plot_dir) heatmaps_for_permutation_stats(lines_root_dir)
def run(configfile: Path): """ This is the main function Lama script for generating data from registering volumes It reads in the config file, creates directories, and initialises the registration process. Looks for paths to inputs relative the directory containing the config file Parameters ---------- param config A toml config file """ try: config = LamaConfig(configfile) except OSError as e: logging.error(f'Cannot open LAMA config file: {str(configfile)}\n{e}') raise except Exception as e: raise (LamaConfigError(e)) config.mkdir('output_dir') qc_dir = config.mkdir('qc_dir') config.mkdir('average_folder') config.mkdir('root_reg_dir') # TODO find the histogram batch code # if not config['no_qc']: # input_histogram_dir = config.mkdir('input_image_histograms') # make_histograms(config['inputs'], input_histogram_dir) logpath = config.config_path.parent / LOG_FILE # Make log in same directory as config file common.init_logging(logpath) if not common.test_installation('elastix'): raise OSError('Make sure elastix is installed') # Catch ctr-c signals so we can write that to logs # signal.signal(signal.SIGTERM, common.service_shutdown) signal.signal(signal.SIGINT, common.service_shutdown) mem_monitor = MonitorMemory(Path(config['output_dir']).absolute()) # Disable QC output? no_qc: bool = config['no_qc'] logging.info(common.git_log() ) # If running from a git repo, log the branch and commit # logging.info("Registration started") first_stage_only = config['skip_forward_registration'] # If we only want the reverse label propagation we just need the initial rigid registration to act as the # Fixed image for the moving populaiton average final_registration_dir = run_registration_schedule( config, first_stage_only=first_stage_only) if not first_stage_only: neg_jac = make_deformations_at_different_scales(config) folding_report(neg_jac, config['output_dir'], config['label_info'], outdir=config['output_dir']) create_glcms(config, final_registration_dir) # Write out the names of the registration dirs in the order they were run with open(config['root_reg_dir'] / REG_DIR_ORDER_CFG, 'w') as fh: for reg_stage in config['registration_stage_params']: fh.write(f'{reg_stage["stage_id"]}\n') if first_stage_only: break if config['skip_transform_inversion']: logging.info('Skipping inversion of transforms') else: logging.info('inverting transforms') if config['label_propagation'] == 'reverse_registration': reverse_registration(config) else: # invert_transform method is the default batch_invert_transform_parameters(config) logging.info('propagating volumes') invert_volumes(config) # Now that labels have been inverted, should we delete the transorm files? if config['delete_inverted_transforms']: shutil.rmtree(config['output_dir'] / 'inverted_transforms') if config['label_map']: generate_organ_volumes(config) if config['seg_plugin_dir']: plugin_interface.secondary_segmentation(config) if not generate_staging_data(config): logging.warning('No staging data generated') if not no_qc: rev_reg = True if config[ 'label_propagation'] == 'reverse_registration' else False make_qc_images(config.config_dir, config['fixed_volume'], qc_dir, mask=None, reverse_reg_propagation=rev_reg) mem_monitor.stop() return True