def _run_interface(self, runtime): import bids from bids.analysis import auto_model models = self.inputs.model if not isinstance(models, list): database_path = self.inputs.database_path layout = bids.BIDSLayout.load(database_path=database_path) if not isdefined(models): # model is not yet standardized, so validate=False # Ignore all subject directories and .git/ and .datalad/ directories indexer = bids.BIDSLayoutIndexer( ignore=[re.compile(r'sub-'), re.compile(r'\.(git|datalad)')]) small_layout = bids.BIDSLayout( layout.root, derivatives=[d.root for d in layout.derivatives.values()], validate=False, indexer=indexer) # PyBIDS can double up, so find unique models models = list(set(small_layout.get(suffix='smdl', return_type='file'))) if not models: raise ValueError("No models found") elif models == 'default': models = auto_model(layout) models = [_ensure_model(m) for m in models] if self.inputs.selectors: # This is almost certainly incorrect models = [model for model in models if all(val in model['Input'].get(key, [val]) for key, val in self.inputs.selectors.items())] self._results['model_spec'] = models return runtime
def run_fitlins(argv=None): import re from nipype import logging as nlogging warnings.showwarning = _warn_redirect opts = get_parser().parse_args(argv) force_index = [ # If entry looks like `/<pattern>/`, treat `<pattern>` as a regex re.compile(ign[1:-1]) if (ign[0], ign[-1]) == ('/', '/') else ign # Iterate over empty tuple if undefined for ign in opts.force_index or () ] ignore = [ # If entry looks like `/<pattern>/`, treat `<pattern>` as a regex re.compile(ign[1:-1]) if (ign[0], ign[-1]) == ('/', '/') else ign # Iterate over empty tuple if undefined for ign in opts.ignore or () ] log_level = 25 + 5 * (opts.quiet - opts.verbose) logger.setLevel(log_level) nlogging.getLogger('nipype.workflow').setLevel(log_level) nlogging.getLogger('nipype.interface').setLevel(log_level) nlogging.getLogger('nipype.utils').setLevel(log_level) if not opts.space: # make it an explicit None opts.space = None if not opts.desc_label: # make it an explicit None opts.desc_label = None ncpus = opts.n_cpus if ncpus < 1: ncpus = cpu_count() plugin_settings = { 'plugin': 'MultiProc', 'plugin_args': { 'n_procs': ncpus, 'raise_insufficient': False, 'maxtasksperchild': 1, }, } if opts.mem_gb: plugin_settings['plugin_args']['memory_gb'] = opts.mem_gb model = default_path(opts.model, opts.bids_dir, 'model-default_smdl.json') if opts.model in (None, 'default') and not op.exists(model): model = 'default' derivatives = True if not opts.derivatives else opts.derivatives # Need this when specifying args directly (i.e. neuroscout) # god bless neuroscout, but let's make it work for others! if isinstance(derivatives, list) and len(derivatives) == 1: # WRONG AND EVIL to those who have spaces in their paths... bad bad practice # TODO - fix neuroscout derivatives = derivatives[0].split(" ") if opts.estimator != 'afni': if opts.error_ts: raise NotImplementedError( "Saving the error time series is only implemented for" " the afni estimator. If this is a feature you want" f" for {opts.estimator} please let us know on github.") if opts.derivative_label: logger.warning('--derivative-label no longer has any effect; ' 'set output directory name directly') os.makedirs(opts.output_dir, exist_ok=True) fub.write_derivative_description(opts.bids_dir, opts.output_dir, vars(opts)) work_dir = mkdtemp() if opts.work_dir is None else opts.work_dir # Go ahead and initialize the layout database if opts.database_path is None: database_path = Path(work_dir) / 'dbcache' reset_database = True else: database_path = opts.database_path reset_database = False indexer = bids.BIDSLayoutIndexer(ignore=ignore, force_index=force_index) layout = bids.BIDSLayout( opts.bids_dir, derivatives=derivatives, database_path=database_path, reset_database=reset_database, indexer=indexer, ) subject_list = None if opts.participant_label is not None: subject_list = fub.collect_participants( layout, participant_label=opts.participant_label) # Build main workflow logger.log(25, INIT_MSG(version=__version__, subject_list=subject_list)) # TODO: Fix AUTO_MODEL # if model == 'default': # models = auto_model(layout) # else: # import json # if op.exists(model): # model_dict = json.loads(Path(model).read_text()) # models = [model_dict] model_dict = None if model == 'default': retcode = 1 raise NotImplementedError( "The default model has not been implemented yet.") else: import json if op.exists(model): model_dict = json.loads(Path(model).read_text()) if not model_dict: raise ValueError( f'model_dict cannot be empty. Invalid model filepath {model}.') graph = BIDSStatsModelsGraph(layout, model_dict) fitlins_wf = init_fitlins_wf( database_path, opts.output_dir, graph=graph, analysis_level=opts.analysis_level, model=model, space=opts.space, desc=opts.desc_label, participants=subject_list, base_dir=work_dir, smoothing=opts.smoothing, drop_missing=opts.drop_missing, drift_model=opts.drift_model, estimator=opts.estimator, errorts=opts.error_ts, ) fitlins_wf.config = deepcopy(config.get_fitlins_config()._sections) if opts.work_dir: # dump crashes in working directory (non /tmp) fitlins_wf.config['execution']['crashdump_dir'] = opts.work_dir retcode = 0 if not opts.reports_only: try: fitlins_wf.run(**plugin_settings) except Exception as e: logger.critical(f"FitLins failed: {e}") raise run_context = { 'version': __version__, 'command': ' '.join(sys.argv), 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S %z'), } selectors = {'desc': opts.desc_label, 'space': opts.space} if subject_list is not None: selectors['subject'] = subject_list graph.load_collections(**selectors) report_dict = build_report_dict(opts.output_dir, work_dir, graph) write_full_report(report_dict, run_context, opts.output_dir) return retcode
def create_bids_dataframe(self): """Generate the dataframe.""" # Suppress a Future Warning from pybids about leading dot included in 'extension' from version 0.14.0 # The config_bids.json file used matches the future behavior # TODO: when reaching version 0.14.0, remove the following line pybids.config.set_option('extension_initial_dot', True) for path_data in self.paths_data: path_data = os.path.join(path_data, '') # Initialize BIDSLayoutIndexer and BIDSLayout # validate=True by default for both indexer and layout, BIDS-validator is not skipped # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions. # Force index of subject subfolders containing CT-scan files under "anat" or "ct" folder based on extensions and modality suffix. # TODO: remove force indexing of microscopy files after BEP microscopy is merged in BIDS # TODO: remove force indexing of CT-scan files after BEP CT-scan is merged in BIDS ext_microscopy = ('.png', '.ome.tif', '.ome.tiff', '.ome.tf2', '.ome.tf8', '.ome.btf') ext_ct = ('.nii.gz', '.nii') suffix_ct = ('ct', 'CT') force_index = [] for root, dirs, files in os.walk(path_data): for file in files: # Microscopy if file == "samples.tsv" or file == "samples.json": force_index.append(file) if (file.endswith(ext_microscopy) and os.path.basename(root) == "microscopy" and (root.replace(path_data, '').startswith("sub"))): force_index.append( os.path.join(root.replace(path_data, ''))) # CT-scan if (file.endswith(ext_ct) and file.split('.')[0].endswith(suffix_ct) and (os.path.basename(root) == "anat" or os.path.basename(root) == "ct") and (root.replace(path_data, '').startswith("sub"))): force_index.append( os.path.join(root.replace(path_data, ''))) indexer = pybids.BIDSLayoutIndexer(force_index=force_index) if self.derivatives: self.write_derivatives_dataset_description(path_data) layout = pybids.BIDSLayout(path_data, config=self.bids_config, indexer=indexer, derivatives=self.derivatives) # Transform layout to dataframe with all entities and json metadata # As per pybids, derivatives don't include parsed entities, only the "path" column df_next = layout.to_df(metadata=True) # Add filename column df_next.insert(1, 'filename', df_next['path'].apply(os.path.basename)) # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering df_next = df_next[~df_next['filename'].str. endswith(tuple(['.json', '.tsv', 'LICENSE']))] # Update dataframe with subject files of chosen contrasts and extensions, # and with derivative files of chosen target_suffix from loader parameters df_next = df_next[( ~df_next['path'].str.contains('derivatives') & df_next['suffix'].str.contains('|'.join(self.contrast_lst)) & df_next['extension'].str.contains('|'.join(self.extensions))) | (df_next['path'].str.contains('derivatives') & df_next['filename'].str. contains('|'.join(self.target_suffix)))] if df_next[~df_next['path'].str.contains('derivatives')].empty: # Warning if no subject files are found in path_data logger.warning( "No subject files were found in '{}' dataset. Skipping dataset." .format(path_data)) else: # Add tsv files metadata to dataframe df_next = self.add_tsv_metadata(df_next, path_data, layout) # TODO: check if other files are needed for EEG and DWI # Merge dataframes self.df = pd.concat([self.df, df_next], join='outer', ignore_index=True) if self.df.empty: # Raise error and exit if no subject files are found in any path data raise RuntimeError( "No subject files found. Check selection of parameters in config.json" " and datasets compliance with BIDS specification.") # Drop duplicated rows based on all columns except 'path' # Keep first occurence columns = self.df.columns.to_list() columns.remove('path') self.df = self.df[~( self.df.astype(str).duplicated(subset=columns, keep='first'))] # If indexing of derivatives is true if self.derivatives: # Get list of subject files with available derivatives has_deriv, deriv = self.get_subjects_with_derivatives() # Filter dataframe to keep subjects files with available derivatives only if has_deriv: self.df = self.df[ self.df['filename'].str.contains('|'.join(has_deriv)) | self.df['filename'].str.contains('|'.join(deriv))] else: # Raise error and exit if no derivatives are found for any subject files raise RuntimeError("Derivatives not found.") # Reset index self.df.reset_index(drop=True, inplace=True) # Drop columns with all null values self.df.dropna(axis=1, inplace=True, how='all')
def create_bids_dataframe(self): """Generate the dataframe.""" for path_data in self.paths_data: path_data = Path(path_data, '') # Initialize BIDSLayoutIndexer and BIDSLayout # validate=True by default for both indexer and layout, BIDS-validator is not skipped # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions. # Force index of subject subfolders containing CT-scan files under "anat" or "ct" folder based on extensions and modality suffix. # TODO: remove force indexing of microscopy files after Microscopy-BIDS is integrated in pybids # TODO: remove force indexing of CT-scan files after BEP CT-scan is merged in BIDS ext_microscopy = ('.png', '.tif', '.ome.tif', '.ome.btf') ext_ct = ('.nii.gz', '.nii') suffix_ct = ('ct', 'CT') force_index = [] for path_object in path_data.glob('**/*'): if path_object.is_file(): # Microscopy subject_path_index = len(path_data.parts) subject_path = path_object.parts[subject_path_index] if path_object.name == "samples.tsv" or path_object.name == "samples.json": force_index.append(path_object.name) if (path_object.name.endswith(ext_microscopy) and path_object.parent.name == "micr" and subject_path.startswith('sub')): force_index.append(str(Path(*path_object.parent.parts[subject_path_index:]))) # CT-scan if (path_object.name.endswith(ext_ct) and path_object.name.split('.')[0].endswith(suffix_ct) and (path_object.parent.name == "anat" or path_object.parent.name == "ct") and subject_path.startswith('sub')): force_index.append(str(Path(*path_object.parent.parts[subject_path_index:]))) indexer = pybids.BIDSLayoutIndexer(force_index=force_index, validate=self.bids_validate) if self.derivatives: self.write_derivatives_dataset_description(path_data) layout = pybids.BIDSLayout(str(path_data), config=self.bids_config, indexer=indexer, derivatives=self.derivatives) # Transform layout to dataframe with all entities and json metadata # As per pybids, derivatives don't include parsed entities, only the "path" column df_next = layout.to_df(metadata=True) # Add filename column df_next.insert(1, 'filename', df_next['path'].apply(os.path.basename)) # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering df_next = df_next[~df_next['filename'].str.endswith(tuple(['.json', '.tsv', 'LICENSE']))] # The following command updates the dataframe by doing 2 things: # 1. Keep only subject files of chosen contrasts (for files that are not in the 'derivatives' folder) # (ex: '<dataset_path>/sub-XX/anat/sub-XX_T1w.nii.gz' with contrast_lst:["T1w"]) # 2. Keep only derivatives files of chosen target_suffix (for files that are in the 'derivatives' folder) # (ex: '<dataset_path>/derivatives/labels/sub-XX/anat/sub-XX_T1w_seg-manual.nii.gz' with target_suffix:["_seg-manual"]) df_next = df_next[(~df_next['path'].str.contains('derivatives') & df_next['suffix'].str.contains('|'.join(self.contrast_lst))) | (df_next['path'].str.contains('derivatives') & (df_next['filename'].str.split('.').apply(lambda x: x[0])).str.endswith(tuple(self.target_suffix)))] # Update dataframe with files of chosen extensions df_next = df_next[df_next['filename'].str.endswith(tuple(self.extensions))] # Warning if no subject files are found in path_data if df_next[~df_next['path'].str.contains('derivatives')].empty: logger.warning(f"No subject files were found in '{path_data}' dataset. Skipping dataset.") else: # Add tsv files metadata to dataframe df_next = self.add_tsv_metadata(df_next, path_data, layout) # TODO: check if other files are needed for EEG and DWI # Merge dataframes self.df = pd.concat([self.df, df_next], join='outer', ignore_index=True) if self.df.empty: # Raise error and exit if no subject files are found in any path data raise RuntimeError("No subject files found. Check selection of parameters in config.json" " and datasets compliance with BIDS specification.") # Drop duplicated rows based on all columns except 'path' # Keep first occurence columns = self.df.columns.to_list() columns.remove('path') self.df = self.df[~(self.df.astype(str).duplicated(subset=columns, keep='first'))] # Remove subject files without the "split_method" metadata if specified and keep all derivatives if self.split_method: files_remove = (self.df[( # Path does not contain derivative string (i.e. we only target subject raw data files) ~self.df['path'].str.contains('derivatives') # and split method metadata is null (i.e. the subject must have the split_method metadata or will be excluded) & self.df[self.split_method].isnull())] # Get these filesnames and convert to list. ['filename']).tolist() if files_remove: logger.warning(f"The following files don't have the '{self.split_method}' metadata indicated as the " f"split_method in the configuration JSON file. Skipping these files: {files_remove}") # Removing from dataframe all filenames which contain any of the file from files_remove field. self.df = self.df[~self.df['filename'].str.contains('|'.join(files_remove))] # If indexing of derivatives is true if self.derivatives: # Get list of subject files with available derivatives has_deriv, deriv = self.get_subjects_with_derivatives() # Filter dataframe to keep subjects files with available derivatives only if has_deriv: self.df = self.df[self.df['filename'].str.contains('|'.join(has_deriv)) | self.df['filename'].str.contains('|'.join(deriv))] else: # Raise error and exit if no derivatives are found for any subject files raise RuntimeError("Derivatives not found.") # Reset index self.df.reset_index(drop=True, inplace=True) # Drop columns with all null values self.df.dropna(axis=1, inplace=True, how='all')
def create_bids_dataframe(loader_params, derivatives): """Create a dataframe containing all BIDS image files in a bids_path and their metadata. Args: loader_params (dict): Loader parameters, see :doc:`configuration_file` for more details. derivatives (bool): If True, derivatives are indexed. Returns: df (pd.DataFrame): Dataframe containing all BIDS image files indexed and their metadata. """ # Get bids_path, bids_config, target_suffix, extensions and contrast_lst from loader parameters bids_path = loader_params['bids_path'] bids_config = None if 'bids_config' not in loader_params else loader_params[ 'bids_config'] target_suffix = loader_params['target_suffix'] # If `target_suffix` is a list of lists convert to list if any(isinstance(t, list) for t in target_suffix): target_suffix = list(itertools.chain.from_iterable(target_suffix)) extensions = loader_params['extensions'] contrast_lst = loader_params["contrast_params"]["contrast_lst"] # Suppress a Future Warning from pybids about leading dot included in 'extension' from version 0.14.0 # The config_bids.json file used matches the future behavior # TODO: when reaching version 0.14.0, remove the following line pybids.config.set_option('extension_initial_dot', True) # Initialize BIDSLayoutIndexer and BIDSLayout # validate=True by default for both indexer and layout, BIDS-validator is not skipped # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions. # TODO: remove force indexing of microscopy files after BEP microscopy is merged in BIDS ext_microscopy = ('.png', '.ome.tif', '.ome.tiff', '.ome.tf2', '.ome.tf8', '.ome.btf') force_index = ['samples.tsv', 'samples.json'] if not bids_path.endswith("/"): bids_path = bids_path + "/" for root, dirs, files in os.walk(bids_path): for file in files: if file.endswith(ext_microscopy) and (root.replace( bids_path, '').startswith("sub")): force_index.append(os.path.join(root.replace(bids_path, ''))) indexer = pybids.BIDSLayoutIndexer(force_index=force_index) layout = pybids.BIDSLayout(bids_path, config=bids_config, indexer=indexer, derivatives=derivatives) # Transform layout to dataframe with all entities and json metadata # As per pybids, derivatives don't include parsed entities, only the "path" column df = layout.to_df(metadata=True) # Add filename and parent_path columns df['filename'] = df['path'].apply(os.path.basename) df['parent_path'] = df['path'].apply(os.path.dirname) # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering df = df[~df['filename'].str.endswith(tuple(['.json', '.tsv', 'LICENSE']))] # Update dataframe with subject files of chosen contrasts and extensions, # and with derivative files of chosen target_suffix from loader parameters df = df[(~df['path'].str.contains('derivatives') & df['suffix'].str.contains('|'.join(contrast_lst)) & df['extension'].str.contains('|'.join(extensions))) | (df['path'].str.contains('derivatives') & df['filename'].str.contains('|'.join(target_suffix)))] # Add metadata from participants.tsv file, if present # Uses pybids function if layout.get_collections(level='dataset'): df_participants = layout.get_collections(level='dataset', merge=True).to_df() df_participants.drop(['suffix'], axis=1, inplace=True) df = pd.merge(df, df_participants, on='subject', suffixes=("_x", None), how='left') # Add metadata from samples.tsv file, if present # TODO: use pybids function after BEP microscopy is merged in BIDS fname_samples = os.path.join(bids_path, "samples.tsv") if os.path.exists(fname_samples): df_samples = pd.read_csv(fname_samples, sep='\t') df['participant_id'] = "sub-" + df['subject'] df['sample_id'] = "sample-" + df['sample'] df = pd.merge(df, df_samples, on=['participant_id', 'sample_id'], suffixes=("_x", None), how='left') df.drop(['participant_id', 'sample_id'], axis=1, inplace=True) # Add metadata from all _sessions.tsv files, if present # Uses pybids function if layout.get_collections(level='subject'): df_sessions = layout.get_collections(level='subject', merge=True).to_df() df_sessions.drop(['suffix'], axis=1, inplace=True) df = pd.merge(df, df_sessions, on=['subject', 'session'], suffixes=("_x", None), how='left') # Add metadata from all _scans.tsv files, if present # TODO: use pybids function after BEP microscopy is merged in BIDS # TODO: verify merge behavior with EEG and DWI scans files, tested with anat and microscopy only df_scans = pd.DataFrame() for root, dirs, files in os.walk(bids_path): for file in files: if file.endswith("scans.tsv"): df_temp = pd.read_csv(os.path.join(root, file), sep='\t') df_scans = pd.concat([df_scans, df_temp], ignore_index=True) if not df_scans.empty: df_scans['filename'] = df_scans['filename'].apply(os.path.basename) df = pd.merge(df, df_scans, on=['filename'], suffixes=("_x", None), how='left') # TODO: check if other files are needed for EEG and DWI # If indexing of derivatives is true # Get list of subject files with available derivatives if derivatives: subject_files = df[~df['path'].str.contains('derivatives' )]['filename'].to_list() prefix_fnames = [] [prefix_fnames.append(s.split('.')[0]) for s in subject_files] deriv = df[df['path'].str.contains('derivatives')]['filename'].tolist() has_deriv = [] for p in prefix_fnames: available = [d for d in deriv if p in d] if available: has_deriv.append(p) for t in target_suffix: if t not in str(available): logger.warning( "Missing target_suffix {} for subject {}.".format( t, p)) else: logger.warning( "Missing derivatives for subject {}. Skipping subject.". format(p)) # Filter dataframe to keep subjects files with available derivatives only if has_deriv: df = df[df['filename'].str.contains('|'.join(has_deriv))] else: # Raise error and exit if no derivatives are found for any subject files raise RuntimeError("Derivatives not found.") # Reset index df.reset_index(drop=True, inplace=True) # Drop columns with all null values df.dropna(axis=1, inplace=True, how='all') return df