def test_bidscoin(bidsmapfile: Union[Path,dict], options: dict=None, testplugins: bool=True): """ Performs a bidscoin installation test :param bidsmapfile: The bidsmap or the full pathname / basename of the bidsmap yaml-file :param options: The bidscoin options. If empty, the default options are used :return: True if the test was successful """ if not bidsmapfile: return LOGGER.info('--------- Testing the BIDScoin tools and settings ---------') # Test loading the template bidsmap if isinstance(bidsmapfile, (str, Path)): try: try: # Include the import in the test + moving the import to the top of this module will cause circular import issues from bidscoin import bids except ImportError: import bids # This should work if bidscoin was not pip-installed bidsmap, _ = bids.load_bidsmap(Path(bidsmapfile)) if not options: options = bidsmap['Options'] success = True except Exception as bidsmaperror: LOGGER.error(f'{bidsmaperror}') success = False else: if not options: options = bidsmapfile['Options'] success = True # Show an overview of the bidscoin tools. TODO: test the entry points? if not options['plugins']: LOGGER.warning('No plugins found in the bidsmap (BIDScoin will likely not do anything)') list_executables(True) # Test the plugins if testplugins: # Show an overview of the plugins and show the test results list_plugins(True) for plugin in (bidscoinfolder/'plugins').glob('*.py'): if plugin.stem != '__init__': success = test_plugin(plugin.stem, options['plugins'].get(plugin.stem,{})) and success return success
def bidstrainer(bidsfolder: str, samplefolder: str, bidsmapfile: str, pattern: str) -> None: """ Main function uses all samples in the samplefolder as training / example data to generate a maximally filled-in bidsmap_sample.yaml file. :param bidsfolder: The name of the BIDS root folder :param samplefolder: The name of the root directory of the tree containing the sample files / training data. If left empty, bidsfolder/code/bidscoin/samples is used or such an empty directory tree is created :param bidsmapfile: The name of the bidsmap YAML-file :param pattern: The regular expression pattern used in re.match(pattern, dicomfile) to select the dicom files', default='.*\\.(IMA|dcm)$') :return: """ bidsfolder = Path(bidsfolder) samplefolder = Path(samplefolder) bidsmapfile = Path(bidsmapfile) # Start logging bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidstrainer.log') LOGGER.info('------------ START BIDStrainer ------------') # Get the heuristics for creating the bidsmap heuristics, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') # Input checking if not samplefolder: samplefolder = bidsfolder / 'code' / 'bidscoin' / 'samples' if not samplefolder.is_dir(): LOGGER.info( f"Creating an empty samples directory tree: {samplefolder}") for modality in bids.bidsmodalities + (bids.ignoremodality, bids.unknownmodality): for run in heuristics['DICOM'][modality]: if not run['bids']['suffix']: run['bids']['suffix'] = '' (samplefolder / modality / run['bids']['suffix']).mkdir( parents=True, exist_ok=True) LOGGER.info( 'Fill the directory tree with example DICOM files and re-run bidstrainer.py' ) return # Create a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists) bidsmap = copy.deepcopy(heuristics) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for modality in bids.bidsmodalities: if bidsmap[logic] and modality in bidsmap[logic]: bidsmap[logic][modality] = None # Loop over all bidsmodalities and instances and built up the bidsmap entries files = samplefolder.rglob('*') samples = [ Path(dcmfile) for dcmfile in files if re.match(pattern, str(dcmfile)) ] for sample in samples: if not sample.is_file(): continue LOGGER.info(f"Parsing: {sample}") # Try to get a dicom mapping if bids.is_dicomfile(sample) and heuristics['DICOM']: bidsmap = built_dicommap(sample, bidsmap, heuristics) # Try to get a PAR/REC mapping if bids.is_parfile(sample) and heuristics['PAR']: bidsmap = built_parmap(sample, bidsmap, heuristics) # Try to get a P7 mapping if bids.is_p7file(sample) and heuristics['P7']: bidsmap = built_p7map(sample, bidsmap, heuristics) # Try to get a nifti mapping if bids.is_niftifile(sample) and heuristics['Nifti']: bidsmap = built_niftimap(sample, bidsmap, heuristics) # Try to get a file-system mapping if heuristics['FileSystem']: bidsmap = built_filesystemmap(sample, bidsmap, heuristics) # Try to get a plugin mapping if heuristics['PlugIn']: bidsmap = built_pluginmap(sample, bidsmap) # Create the bidsmap_sample YAML-file in bidsfolder/code/bidscoin (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True) bidsmapfile = bidsfolder / 'code' / 'bidscoin' / 'bidsmap_sample.yaml' # Save the bidsmap to the bidsmap YAML-file bids.save_bidsmap(bidsmapfile, bidsmap) LOGGER.info('------------ FINISHED! ------------')
def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list = (), force: bool = False, participants: bool = False, bidsmapfile: str = 'bidsmap.yaml', subprefix: str = 'sub-', sesprefix: str = 'ses-') -> None: """ Main function that processes all the subjects and session in the sourcefolder and uses the bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param subjects: List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected :param force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Input checking & defaults rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) # Start logging bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log') LOGGER.info('') LOGGER.info( f"-------------- START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------" ) LOGGER.info( f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}" f" participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}" ) # Create a code/bidscoin subfolder (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True) # Create a dataset description file if it does not exist dataset_file = bidsfolder / 'dataset_description.json' if not dataset_file.is_file(): dataset_description = { "Name": "REQUIRED. Name of the dataset", "BIDSVersion": str(bids.bidsversion()), "DatasetType": "raw", "License": "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "EthicsApprovals": [ "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers" ], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links", "https://github.com/Donders-Institute/bidscoin" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } LOGGER.info(f"Creating dataset description file: {dataset_file}") with open(dataset_file, 'w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = bidsfolder / 'README' if not readme_file.is_file(): LOGGER.info(f"Creating README file: {readme_file}") with open(readme_file, 'w') as fid: fid.write( f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n" f"The raw BIDS data was created using BIDScoin {bids.version()}\n" f"All provenance information and settings can be found in ./code/bidscoin\n" f"For more information see: https://github.com/Donders-Institute/bidscoin" ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') if not bidsmap: LOGGER.error( f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and / or use the correct bidsfolder" ) return # Save options to the .bidsignore file bidsignore_items = [ item.strip() for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';') ] LOGGER.info( f"Writing {bidsignore_items} entries to {bidsfolder}.bidsignore") with (bidsfolder / '.bidsignore').open('w') as bidsignore: for item in bidsignore_items: bidsignore.write(item + '\n') # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder / 'participants.tsv' participants_json = participants_tsv.with_suffix('.json') if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if participants_json.is_file(): with participants_json.open('r') as json_fid: participants_dict = json.load(json_fid) else: participants_dict = { 'participant_id': { 'Description': 'Unique participant identifier' } } # Get the list of subjects if not subjects: subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*") else: subjects = [ subprefix + re.sub(f"^{subprefix}", '', subject) for subject in subjects ] # Make sure there is a "sub-" prefix subjects = [ rawfolder / subject for subject in subjects if (rawfolder / subject).is_dir() ] # Loop over all subjects and sessions and convert them using the bidsmap entries for n, subject in enumerate(subjects, 1): LOGGER.info( f"------------------- Subject {n}/{len(subjects)} -------------------" ) if participants and subject.name in list(participants_table.index): LOGGER.info(f"Skipping subject: {subject} ({n}/{len(subjects)})") continue personals = dict() sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix) # See what dataformat we have dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info(f"Skipping unknown session: {session}") continue # Check if we should skip the session-folder if not force: subid, sesid = bids.get_subid_sesid(session / 'dum.my', subprefix=subprefix, sesprefix=sesprefix) bidssession = bidsfolder / subid / sesid if not bidsmap[dataformat]['session']: bidssession = bidssession.parent datatypes = [] for datatype in bids.lsdirs( bidssession ): # See what datatypes we already have in the bids session-folder if datatype.glob('*') and bidsmap[dataformat].get( datatype.name ): # See if we are going to add data for this datatype datatypes.append(datatype.name) if datatypes: LOGGER.info( f"Skipping processed session: {bidssession} already has {datatypes} data (use the -f option to overrule)" ) continue LOGGER.info(f"Coining session: {session}") # Update / append the source data mapping if dataformat in ('DICOM', 'PAR'): coin_data2bids(dataformat, session, bidsmap, bidsfolder, personals, subprefix, sesprefix) # Update / append the P7 mapping if dataformat == 'P7': LOGGER.error( f"{dataformat} not (yet) supported, skipping session: {session}" ) continue # Update / append the nifti mapping if dataformat == 'Nifti': coin_nifti(session, bidsmap, bidsfolder, personals) # Update / append the file-system mapping if dataformat == 'FileSystem': coin_filesystem(session, bidsmap, bidsfolder, personals) # Update / append the plugin mapping if bidsmap['PlugIns']: coin_plugin(session, bidsmap, bidsfolder, personals) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) # Store the collected personals in the participant_table for key in personals: # participant_id is the index of the participants_table assert 'participant_id' in personals if key == 'participant_id': continue # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_dict: participants_dict[key] = dict( LongName='Long (unabbreviated) name of the column', Description='Description of the the column', Levels=dict( Key= 'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))' ), Units= 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED', TermURL= 'URL pointing to a formal definition of this type of data in an ontology available on the web' ) participants_table.loc[personals['participant_id'], key] = personals[key] # Write the collected data to the participant files LOGGER.info(f"Writing subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a') LOGGER.info(f"Writing subject data dictionary to: {participants_json}") with participants_json.open('w') as json_fid: json.dump(participants_dict, json_fid, indent=4) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') bids.reporterrors()
def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list = (), force: bool = False, participants: bool = False, bidsmapfile: str = 'bidsmap.yaml') -> None: """ Main function that processes all the subjects and session in the sourcefolder and uses the bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param subjects: List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected :param force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin :return: Nothing """ # Input checking & defaults rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) # Start logging bidscoin.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log') LOGGER.info('') LOGGER.info( f"-------------- START BIDScoiner {localversion}: BIDS {bidscoin.bidsversion()} ------------" ) LOGGER.info( f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force} participants={participants} bidsmap={bidsmapfile}" ) # Create a code/bidscoin subfolder (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True) # Create a dataset description file if it does not exist dataset_file = bidsfolder / 'dataset_description.json' generatedby = [{ "Name": "BIDScoin", "Version": localversion, "CodeURL": "https://github.com/Donders-Institute/bidscoin" }] if not dataset_file.is_file(): LOGGER.info(f"Creating dataset description file: {dataset_file}") dataset_description = { "Name": "REQUIRED. Name of the dataset", "GeneratedBy": generatedby, "BIDSVersion": str(bidscoin.bidsversion()), "DatasetType": "raw", "License": "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "EthicsApprovals": [ "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers" ], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links", "https://github.com/Donders-Institute/bidscoin" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } else: with dataset_file.open('r') as fid: dataset_description = json.load(fid) if 'BIDScoin' not in [ generatedby_['Name'] for generatedby_ in dataset_description.get('GeneratedBy', []) ]: LOGGER.info(f"Adding {generatedby} to {dataset_file}") dataset_description['GeneratedBy'] = dataset_description.get( 'GeneratedBy', []) + generatedby with dataset_file.open('w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = bidsfolder / 'README' if not readme_file.is_file(): LOGGER.info(f"Creating README file: {readme_file}") readme_file.write_text( f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n" f"The raw BIDS data was created using BIDScoin {localversion}\n" f"All provenance information and settings can be found in ./code/bidscoin\n" f"For more information see: https://github.com/Donders-Institute/bidscoin\n" ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') dataformats = [ dataformat for dataformat in bidsmap if dataformat and dataformat not in ('Options', 'PlugIns') ] # Handle legacy bidsmaps (-> 'PlugIns') if not bidsmap: LOGGER.error( f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and/or use the correct bidsfolder" ) return # Load the data conversion plugins plugins = [ bidscoin.import_plugin(plugin, ('bidscoiner_plugin', )) for plugin, options in bidsmap['Options']['plugins'].items() ] plugins = [plugin for plugin in plugins if plugin] # Filter the empty items from the list if not plugins: LOGGER.warning( f"The plugins listed in your bidsmap['Options'] did not have a usable `bidscoiner_plugin` function, nothing to do" ) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') return # Append options to the .bidsignore file bidsignore_items = [ item.strip() for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';') ] bidsignore_file = bidsfolder / '.bidsignore' if bidsignore_items: LOGGER.info(f"Writing {bidsignore_items} entries to {bidsignore_file}") if bidsignore_file.is_file(): bidsignore_items += bidsignore_file.read_text().splitlines() with bidsignore_file.open('w') as bidsignore: for item in set(bidsignore_items): bidsignore.write(item + '\n') # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' # Get the list of subjects subprefix = bidsmap['Options']['bidscoin']['subprefix'].replace('*', '') sesprefix = bidsmap['Options']['bidscoin']['sesprefix'].replace('*', '') if not subjects: subjects = bidscoin.lsdirs( rawfolder, (subprefix if subprefix != '*' else '') + '*') if not subjects: LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*") else: subjects = [ rawfolder / (subprefix + re.sub(f"^{subprefix}", '', subject)) for subject in subjects ] # Make sure there is a sub-prefix # Loop over all subjects and sessions and convert them using the bidsmap entries with logging_redirect_tqdm(): for n, subject in enumerate( tqdm(subjects, unit='subject', leave=False), 1): LOGGER.info( f"------------------- Subject {n}/{len(subjects)} -------------------" ) if participants and subject.name in list(participants_table.index): LOGGER.info( f"Skipping subject: {subject} ({n}/{len(subjects)})") continue if not subject.is_dir(): LOGGER.warning( f"The '{subject}' subject folder does not exist") continue sessions = bidscoin.lsdirs( subject, (sesprefix if sesprefix != '*' else '') + '*') if not sessions or (subject / 'DICOMDIR').is_file(): sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file sesfolders, unpacked = bids.unpack(session) for sesfolder in sesfolders: # Check if we should skip the session-folder datasource = bids.get_datasource( sesfolder, bidsmap['Options']['plugins']) if not datasource.dataformat: LOGGER.info( f"No coinable datasources found in '{sesfolder}'") continue subid = bidsmap[datasource.dataformat]['subject'] sesid = bidsmap[datasource.dataformat]['session'] subid, sesid = datasource.subid_sesid( subid, sesid if sesid else '') bidssession = bidsfolder / subid / sesid # TODO: Support DICOMDIR with multiple subjects (as in PYDICOMDIR) if not force and bidssession.is_dir(): datatypes = [] for dataformat in dataformats: for datatype in bidscoin.lsdirs( bidssession ): # See what datatypes we already have in the bids session-folder if datatype.iterdir( ) and bidsmap[dataformat].get( datatype.name ): # See if we are going to add data for this datatype datatypes.append(datatype.name) if datatypes: LOGGER.info( f"Skipping processed session: {bidssession} already has {datatypes} data (you can carefully use the -f option to overrule)" ) continue LOGGER.info(f"Coining datasources in: {sesfolder}") if bidssession.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidssession} was cleaned-up from old data before (re)running the bidscoiner" ) bidssession.mkdir(parents=True, exist_ok=True) # Run the bidscoiner plugins for module in plugins: LOGGER.info( f"Executing plugin: {Path(module.__file__).name}") module.bidscoiner_plugin(sesfolder, bidsmap, bidssession) # Add the special fieldmap metadata (IntendedFor, B0FieldIdentifier, TE, etc) addmetadata(bidssession, subid, sesid) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(sesfolder) # Re-read the participants_table and store the collected personals in the json sidecar-file if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) participants_json = participants_tsv.with_suffix('.json') participants_dict = {} if participants_json.is_file(): with participants_json.open('r') as json_fid: participants_dict = json.load(json_fid) if not participants_dict.get('participant_id'): participants_dict['participant_id'] = { 'Description': 'Unique participant identifier' } if not participants_dict.get( 'session_id') and 'session_id' in participants_table.columns: participants_dict['session_id'] = {'Description': 'Session identifier'} newkey = False for col in participants_table.columns: if col not in participants_dict: newkey = True participants_dict[col] = dict( LongName='Long (unabbreviated) name of the column', Description='Description of the the column', Levels=dict( Key= 'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))' ), Units= 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED' ) # Write the collected data to the participant files if newkey: LOGGER.info(f"Writing subject meta data to: {participants_json}") with participants_json.open('w') as json_fid: json.dump(participants_dict, json_fid, indent=4) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') bidscoin.reporterrors()
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, plugins: list, subprefix: str, sesprefix: str, store: bool=False, noedit: bool=False, force: bool=False) -> None: """ Main function that processes all the subjects and session in the sourcefolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param bidsmapfile: The name of the bidsmap YAML-file :param templatefile: The name of the bidsmap template YAML-file :param plugins: Optional list of plugins that should be used (overrules the list in the study/template bidsmaps) :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param store: If True, the provenance samples will be stored :param noedit: The bidseditor will not be launched if True :param force: If True, the previous bidsmap and logfiles will be deleted :return: """ # Input checking rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) templatefile = Path(templatefile) bidscoinfolder = bidsfolder/'code'/'bidscoin' # Start logging if force: (bidscoinfolder/'bidsmapper.log').unlink(missing_ok=True) bidscoin.setup_logging(bidscoinfolder/'bidsmapper.log') LOGGER.info('') LOGGER.info('-------------- START BIDSmapper ------------') LOGGER.info(f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} " f"template={templatefile} plugins={plugins} subprefix={subprefix} sesprefix={sesprefix} store={store} force={force}") # Get the heuristics for filling the new bidsmap bidsmap_old, bidsmapfile = bids.load_bidsmap(bidsmapfile, bidscoinfolder, plugins) template, _ = bids.load_bidsmap(templatefile, bidscoinfolder, plugins) # Create the new bidsmap as a copy / bidsmap skeleton with no datatype entries (i.e. bidsmap with empty lists) if force: bidsmapfile.unlink(missing_ok=True) bidsmap_old = {} if bidsmap_old: bidsmap_new = copy.deepcopy(bidsmap_old) else: bidsmap_new = copy.deepcopy(template) template['Options'] = bidsmap_new['Options'] # Always use the options of the new bidsmap bidscoindatatypes = bidsmap_new['Options']['bidscoin'].get('datatypes',[]) unknowndatatypes = bidsmap_new['Options']['bidscoin'].get('unknowntypes',[]) ignoredatatypes = bidsmap_new['Options']['bidscoin'].get('ignoretypes',[]) for dataformat in bidsmap_new: if dataformat in ('Options','PlugIns'): continue # Handle legacy bidsmaps (-> 'PlugIns') for datatype in bidscoindatatypes + unknowndatatypes + ignoredatatypes: if bidsmap_new[dataformat].get(datatype): bidsmap_new[dataformat][datatype] = None # Store/retrieve the empty or user-defined sub-/ses-prefix subprefix, sesprefix = setprefix(bidsmap_new, subprefix, sesprefix) # Start with an empty skeleton if we didn't have an old bidsmap if not bidsmap_old: bidsmap_old = copy.deepcopy(bidsmap_new) bidsmapfile = bidscoinfolder/'bidsmap.yaml' # Import the data scanning plugins plugins = [bidscoin.import_plugin(plugin, ('bidsmapper_plugin',)) for plugin in bidsmap_new['Options']['plugins']] plugins = [plugin for plugin in plugins if plugin] # Filter the empty items from the list if not plugins: LOGGER.warning(f"The plugins listed in your bidsmap['Options'] did not have a usable `bidsmapper_plugin` function, nothing to do") LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') return # Loop over all subjects and sessions and built up the bidsmap entries subjects = bidscoin.lsdirs(rawfolder, (subprefix if subprefix!='*' else '') + '*') if not subjects: LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*') with logging_redirect_tqdm(): for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1): sessions = bidscoin.lsdirs(subject, (sesprefix if sesprefix!='*' else '') + '*') if not sessions or (subject/'DICOMDIR').is_file(): sessions = [subject] for session in sessions: LOGGER.info(f"Mapping: {session} (subject {n}/{len(subjects)})") # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file sesfolders, unpacked = bids.unpack(session) for sesfolder in sesfolders: if store: store = {'source': sesfolder.parent.parent.parent.parent if unpacked else rawfolder.parent, 'target': bidscoinfolder/'provenance'} else: store = {} # Run the bidsmapper plugins for module in plugins: LOGGER.info(f"Executing plugin: {Path(module.__file__).name} -> {sesfolder}") module.bidsmapper_plugin(sesfolder, bidsmap_new, bidsmap_old, template, store) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(sesfolder) # Save the new study bidsmap in the bidscoinfolder or launch the bidseditor UI_MainWindow if noedit: bids.save_bidsmap(bidsmapfile, bidsmap_new) else: LOGGER.info('Opening the bidseditor') app = QApplication(sys.argv) app.setApplicationName(f"{bidsmapfile} - BIDS editor {localversion}") mainwin = bidseditor.MainWindow(bidsfolder, bidsmap_new, template) mainwin.show() messagebox = QMessageBox(mainwin) messagebox.setText(f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " f"and BIDScoin options and (re)edit them to your needs.\n\n" f"You can always redo this step later by re-running the " f"bidsmapper or by just running the bidseditor tool\n\n" f"{versionmessage}") messagebox.setWindowTitle('About the BIDS-mapping workflow') messagebox.setIconPixmap(QtGui.QPixmap(str(bidseditor.BIDSCOIN_LOGO)).scaled(150, 150, QtCore.Qt.KeepAspectRatio, QtCore.Qt.SmoothTransformation)) messagebox.setWindowFlags(messagebox.windowFlags() & ~QtCore.Qt.WindowMinMaxButtonsHint) messagebox.show() app.exec() LOGGER.info('-------------- FINISHED! -------------------') LOGGER.info('') bidscoin.reporterrors()
def bidsparticipants(rawfolder: str, bidsfolder: str, keys: str, bidsmapfile: str='bidsmap.yaml', dryrun: bool=False) -> None: """ Main function that processes all the subjects and session in the sourcefolder to (re)generate the particpants.tsv file in the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param keys: The keys that are extracted fro mthe source data when populating the participants.tsv file :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin :param dryrun: Boolean to just display the participants info :return: Nothing """ # Input checking & defaults rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() # Start logging if dryrun: bidscoin.setup_logging() else: bidscoin.setup_logging(bidsfolder/'code'/'bidscoin'/'bidsparticipants.log') LOGGER.info('') LOGGER.info(f"-------------- START bidsparticipants {bidscoin.version()} ------------") LOGGER.info(f">>> bidsparticipants sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile}") # Get the bidsmap sub-/ses-prefix from the bidsmap YAML-file bidsmap,_ = bids.load_bidsmap(Path(bidsmapfile), bidsfolder/'code'/'bidscoin') subprefix = bidsmap['Options']['bidscoin']['subprefix'] sesprefix = bidsmap['Options']['bidscoin']['sesprefix'] # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder/'participants.tsv' participants_json = participants_tsv.with_suffix('.json') if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if participants_json.is_file(): with participants_json.open('r') as json_fid: participants_dict = json.load(json_fid) else: participants_dict = {'participant_id': {'Description': 'Unique participant identifier'}} # Get the list of subjects subjects = bidscoin.lsdirs(bidsfolder, 'sub-*') if not subjects: LOGGER.warning(f"No subjects found in: {bidsfolder}") # Remove obsolete participants from the participants table for participant in participants_table.index: if participant not in [sub.name for sub in subjects]: participants_table = participants_table.drop(participant) # Loop over all subjects in the bids-folder and add them to the participants table with logging_redirect_tqdm(): for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1): LOGGER.info(f"------------------- Subject {n}/{len(subjects)} -------------------") personals = dict() subject = rawfolder/subject.name.replace('sub-', subprefix.replace('*','')) # TODO: This assumes e.g. that the subject-ids in the rawfolder did not contain BIDS-invalid characters (such as '_') sessions = bidscoin.lsdirs(subject, (sesprefix if sesprefix!='*' else '') + '*') if not subject.is_dir(): LOGGER.error(f"Could not find source-folder: {subject}") continue if not sessions: sessions = [subject] for session in sessions: # Only take data from the first session -> BIDS specification subid, sesid = bids.DataSource(session/'dum.my', subprefix='sub-', sesprefix='ses-').subid_sesid() if sesprefix and sesid and 'session_id' not in personals: personals['session_id'] = sesid participants_dict['session_id'] = {'Description': 'Session identifier'} # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file sesfolders, unpacked = bids.unpack(session) for sesfolder in sesfolders: # Update / append the personal source data LOGGER.info(f"Scanning session: {sesfolder}") success = scanpersonals(bidsmap, sesfolder, personals) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(sesfolder) if success: break # Store the collected personals in the participant_table. TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file for key in keys: if key not in participants_dict: participants_dict[key] = dict(LongName = 'Long (unabbreviated) name of the column', Description = 'Description of the the column', Levels = dict(Key='Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'), Units = 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED') participants_table.loc[subid, key] = personals.get(key) # Write the collected data to the participant files LOGGER.info(f"Writing subject data to: {participants_tsv}") if not dryrun: participants_table.replace('','n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a') LOGGER.info(f"Writing subject data dictionary to: {participants_json}") if not dryrun: with participants_json.open('w') as json_fid: json.dump(participants_dict, json_fid, indent=4) print(participants_table) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') bidscoin.reporterrors()
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, subprefix: str = 'sub-', sesprefix: str = 'ses-', interactive: bool = True) -> None: """ Main function that processes all the subjects and session in the sourcefolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param bidsmapfile: The name of the bidsmap YAML-file :param templatefile: The name of the bidsmap template YAML-file :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param interactive: If True, the user will be asked for help if an unknown run is encountered :return:bidsmapfile: The name of the mapped bidsmap YAML-file """ # Input checking rawfolder = Path(rawfolder) bidsfolder = Path(bidsfolder) bidsmapfile = Path(bidsmapfile) templatefile = Path(templatefile) # Start logging bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidsmapper.log') LOGGER.info('') LOGGER.info('-------------- START BIDSmapper ------------') LOGGER.info( f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} " f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} interactive={interactive}" ) # Get the heuristics for filling the new bidsmap bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') template, _ = bids.load_bidsmap(templatefile, bidsfolder / 'code' / 'bidscoin') # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists) if bidsmap_old: bidsmap_new = copy.deepcopy(bidsmap_old) else: bidsmap_new = copy.deepcopy(template) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for modality in bids.bidsmodalities + (bids.unknownmodality, bids.ignoremodality): if bidsmap_new[logic] and modality in bidsmap_new[logic]: bidsmap_new[logic][modality] = None # Start with an empty skeleton if we didn't have an old bidsmap if not bidsmap_old: bidsmap_old = copy.deepcopy(bidsmap_new) # Start the Qt-application gui = interactive if gui: app = QApplication(sys.argv) app.setApplicationName('BIDS editor') mainwin = bidseditor.MainWindow() gui = bidseditor.Ui_MainWindow() gui.interactive = interactive gui.subprefix = subprefix gui.sesprefix = sesprefix if gui.interactive == 2: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper will now scan {bidsfolder} and whenever " f"it detects a new type of scan it will ask you to identify it.\n\n" f"It is important that you choose the correct BIDS modality " f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n" f"At the end you will be shown an overview of all the " f"different scan types and BIDScoin options (as in the " f"bidseditor) that you can then (re)edit to your needs") # Loop over all subjects and sessions and built up the bidsmap entries subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*') gui = None for n, subject in enumerate(subjects, 1): sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})") for runfolder in bids.lsdirs(session): # Update / append the dicom mapping if bidsmap_old['DICOM']: bidsmap_new = build_dicommap(runfolder, bidsmap_new, bidsmap_old, template, gui) # Update / append the PAR/REC mapping if bidsmap_old['PAR']: bidsmap_new = build_parmap(runfolder, bidsmap_new, bidsmap_old) # Update / append the P7 mapping if bidsmap_old['P7']: bidsmap_new = build_p7map(runfolder, bidsmap_new, bidsmap_old) # Update / append the nifti mapping if bidsmap_old['Nifti']: bidsmap_new = build_niftimap(runfolder, bidsmap_new, bidsmap_old) # Update / append the file-system mapping if bidsmap_old['FileSystem']: bidsmap_new = build_filesystemmap(runfolder, bidsmap_new, bidsmap_old) # Update / append the plugin mapping if bidsmap_old['PlugIns']: bidsmap_new = build_pluginmap(runfolder, bidsmap_new, bidsmap_old) # Create the bidsmap YAML-file in bidsfolder/code/bidscoin bidsmapfile = bidsfolder / 'code' / 'bidscoin' / 'bidsmap.yaml' bidsmapfile.parent.mkdir(parents=True, exist_ok=True) # Save the bidsmap to the bidsmap YAML-file bids.save_bidsmap(bidsmapfile, bidsmap_new) # (Re)launch the bidseditor UI_MainWindow if gui: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " f"and BIDScoin options and (re)edit them to your needs.\n\n" f"You can always redo this step later by re-running the " f"bidsmapper or by just running the bidseditor tool") LOGGER.info('Opening the bidseditor') gui.setupUi(mainwin, bidsfolder, rawfolder, bidsmapfile, bidsmap_new, copy.deepcopy(bidsmap_new), template, subprefix=subprefix, sesprefix=sesprefix) mainwin.show() app.exec() LOGGER.info('-------------- FINISHED! -------------------') LOGGER.info('') bids.reporterrors()
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, subprefix: str='sub-', sesprefix: str='ses-', store: bool=False, interactive: bool=True) -> None: """ Main function that processes all the subjects and session in the sourcefolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param bidsmapfile: The name of the bidsmap YAML-file :param templatefile: The name of the bidsmap template YAML-file :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param store: If True, the provenance samples will be stored :param interactive: If True, the user will be asked for help if an unknown run is encountered :return:bidsmapfile: The name of the mapped bidsmap YAML-file """ # Input checking rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) templatefile = Path(templatefile) bidscoinfolder = bidsfolder/'code'/'bidscoin' # Start logging bids.setup_logging(bidscoinfolder/'bidsmapper.log') LOGGER.info('') LOGGER.info('-------------- START BIDSmapper ------------') LOGGER.info(f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} " f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}") # Get the heuristics for filling the new bidsmap bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidscoinfolder) template, _ = bids.load_bidsmap(templatefile, bidscoinfolder) # Create the new bidsmap as a copy / bidsmap skeleton with no datatype entries (i.e. bidsmap with empty lists) if bidsmap_old: bidsmap_new = copy.deepcopy(bidsmap_old) else: bidsmap_new = copy.deepcopy(template) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for datatype in bids.bidsdatatypes + (bids.unknowndatatype, bids.ignoredatatype): if bidsmap_new.get(logic) and datatype in bidsmap_new[logic]: bidsmap_new[logic][datatype] = None # Start with an empty skeleton if we didn't have an old bidsmap if not bidsmap_old: bidsmap_old = copy.deepcopy(bidsmap_new) # Start the Qt-application gui = interactive if gui: app = QApplication(sys.argv) app.setApplicationName(f"{bidsmapfile} - BIDS editor {bids.version()}") mainwin = bidseditor.MainWindow() gui = bidseditor.Ui_MainWindow() gui.interactive = interactive gui.subprefix = subprefix gui.sesprefix = sesprefix if gui.interactive == 2: QMessageBox.information(mainwin, 'BIDS mapping workflow', f"The bidsmapper will now scan {bidsfolder} and whenever " f"it detects a new type of scan it will ask you to identify it.\n\n" f"It is important that you choose the correct BIDS datatype " f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n" f"At the end you will be shown an overview of all the " f"different scan types and BIDScoin options (as in the " f"bidseditor) that you can then (re)edit to your needs") # Loop over all subjects and sessions and built up the bidsmap entries dataformat = '' subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*') gui = None for n, subject in enumerate(subjects,1): sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix) if unpacked: store = dict(source=unpacked, target=bidscoinfolder/'provenance') elif store: store = dict(source=rawfolder, target=bidscoinfolder/'provenance') else: store = dict() # Loop of the different DICOM runs (series) and collect source files sourcefiles = [] dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info(f"Skipping: {session} (subject {n}/{len(subjects)})") continue LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})") if dataformat=='DICOM': for sourcedir in bids.lsdirs(session): sourcefile = bids.get_dicomfile(sourcedir) if sourcefile.name: sourcefiles.append(sourcefile) if dataformat=='PAR': sourcefiles = bids.get_parfiles(session) if dataformat=='P7': sourcefiles = bids.get_p7file(session) # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: bidsmap_new = build_bidsmap(dataformat, sourcefile, bidsmap_new, bidsmap_old, template, store, gui) # Update / append the nifti mapping if dataformat=='Nifti': bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old) # Update / append the file-system mapping if dataformat=='FileSystem': bidsmap_new = build_filesystemmap(session, bidsmap_new, bidsmap_old) # Update / append the plugin mapping if bidsmap_old['PlugIns']: bidsmap_new = build_pluginmap(session, bidsmap_new, bidsmap_old) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) if not dataformat: LOGGER.warning('Could not determine the dataformat of the source data') # (Re)launch the bidseditor UI_MainWindow bidsmapfile = bidscoinfolder/'bidsmap.yaml' if gui: if not dataformat: QMessageBox.information(mainwin, 'BIDS mapping workflow', 'Could not determine the dataformat of the source data.\n' 'You can try running the bidseditor tool yourself') else: QMessageBox.information(mainwin, 'BIDS mapping workflow', f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " f"and BIDScoin options and (re)edit them to your needs.\n\n" f"You can always redo this step later by re-running the " f"bidsmapper or by just running the bidseditor tool") LOGGER.info('Opening the bidseditor') gui.setupUi(mainwin, bidsfolder, bidsmapfile, bidsmap_new, copy.deepcopy(bidsmap_new), template, dataformat, subprefix=subprefix, sesprefix=sesprefix) mainwin.show() app.exec() else: # Save the bidsmap in the bidscoinfolder bids.save_bidsmap(bidsmapfile, bidsmap_new) LOGGER.info('-------------- FINISHED! -------------------') LOGGER.info('') bids.reporterrors()