def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list = (), force: bool = False, participants: bool = False, bidsmapfile: str = 'bidsmap.yaml', subprefix: str = 'sub-', sesprefix: str = 'ses-') -> None: """ Main function that processes all the subjects and session in the sourcefolder and uses the bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param subjects: List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected :param force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Input checking & defaults rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) # Start logging bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log') LOGGER.info('') LOGGER.info( f"-------------- START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------" ) LOGGER.info( f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}" f" participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}" ) # Create a code/bidscoin subfolder (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True) # Create a dataset description file if it does not exist dataset_file = bidsfolder / 'dataset_description.json' if not dataset_file.is_file(): dataset_description = { "Name": "REQUIRED. Name of the dataset", "BIDSVersion": str(bids.bidsversion()), "DatasetType": "raw", "License": "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "EthicsApprovals": [ "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers" ], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links", "https://github.com/Donders-Institute/bidscoin" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } LOGGER.info(f"Creating dataset description file: {dataset_file}") with open(dataset_file, 'w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = bidsfolder / 'README' if not readme_file.is_file(): LOGGER.info(f"Creating README file: {readme_file}") with open(readme_file, 'w') as fid: fid.write( f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n" f"The raw BIDS data was created using BIDScoin {bids.version()}\n" f"All provenance information and settings can be found in ./code/bidscoin\n" f"For more information see: https://github.com/Donders-Institute/bidscoin" ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') if not bidsmap: LOGGER.error( f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and / or use the correct bidsfolder" ) return # Save options to the .bidsignore file bidsignore_items = [ item.strip() for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';') ] LOGGER.info( f"Writing {bidsignore_items} entries to {bidsfolder}.bidsignore") with (bidsfolder / '.bidsignore').open('w') as bidsignore: for item in bidsignore_items: bidsignore.write(item + '\n') # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder / 'participants.tsv' participants_json = participants_tsv.with_suffix('.json') if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if participants_json.is_file(): with participants_json.open('r') as json_fid: participants_dict = json.load(json_fid) else: participants_dict = { 'participant_id': { 'Description': 'Unique participant identifier' } } # Get the list of subjects if not subjects: subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*") else: subjects = [ subprefix + re.sub(f"^{subprefix}", '', subject) for subject in subjects ] # Make sure there is a "sub-" prefix subjects = [ rawfolder / subject for subject in subjects if (rawfolder / subject).is_dir() ] # Loop over all subjects and sessions and convert them using the bidsmap entries for n, subject in enumerate(subjects, 1): LOGGER.info( f"------------------- Subject {n}/{len(subjects)} -------------------" ) if participants and subject.name in list(participants_table.index): LOGGER.info(f"Skipping subject: {subject} ({n}/{len(subjects)})") continue personals = dict() sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix) # See what dataformat we have dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info(f"Skipping unknown session: {session}") continue # Check if we should skip the session-folder if not force: subid, sesid = bids.get_subid_sesid(session / 'dum.my', subprefix=subprefix, sesprefix=sesprefix) bidssession = bidsfolder / subid / sesid if not bidsmap[dataformat]['session']: bidssession = bidssession.parent datatypes = [] for datatype in bids.lsdirs( bidssession ): # See what datatypes we already have in the bids session-folder if datatype.glob('*') and bidsmap[dataformat].get( datatype.name ): # See if we are going to add data for this datatype datatypes.append(datatype.name) if datatypes: LOGGER.info( f"Skipping processed session: {bidssession} already has {datatypes} data (use the -f option to overrule)" ) continue LOGGER.info(f"Coining session: {session}") # Update / append the source data mapping if dataformat in ('DICOM', 'PAR'): coin_data2bids(dataformat, session, bidsmap, bidsfolder, personals, subprefix, sesprefix) # Update / append the P7 mapping if dataformat == 'P7': LOGGER.error( f"{dataformat} not (yet) supported, skipping session: {session}" ) continue # Update / append the nifti mapping if dataformat == 'Nifti': coin_nifti(session, bidsmap, bidsfolder, personals) # Update / append the file-system mapping if dataformat == 'FileSystem': coin_filesystem(session, bidsmap, bidsfolder, personals) # Update / append the plugin mapping if bidsmap['PlugIns']: coin_plugin(session, bidsmap, bidsfolder, personals) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) # Store the collected personals in the participant_table for key in personals: # participant_id is the index of the participants_table assert 'participant_id' in personals if key == 'participant_id': continue # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_dict: participants_dict[key] = dict( LongName='Long (unabbreviated) name of the column', Description='Description of the the column', Levels=dict( Key= 'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))' ), Units= 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED', TermURL= 'URL pointing to a formal definition of this type of data in an ontology available on the web' ) participants_table.loc[personals['participant_id'], key] = personals[key] # Write the collected data to the participant files LOGGER.info(f"Writing subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a') LOGGER.info(f"Writing subject data dictionary to: {participants_json}") with participants_json.open('w') as json_fid: json.dump(participants_dict, json_fid, indent=4) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') bids.reporterrors()
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, subprefix: str = 'sub-', sesprefix: str = 'ses-', store: bool = False, interactive: bool = True) -> None: """ Main function that processes all the subjects and session in the sourcefolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param bidsmapfile: The name of the bidsmap YAML-file :param templatefile: The name of the bidsmap template YAML-file :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param store: If True, the provenance samples will be stored :param interactive: If True, the user will be asked for help if an unknown run is encountered :return:bidsmapfile: The name of the mapped bidsmap YAML-file """ # Input checking rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) templatefile = Path(templatefile) bidscoinfolder = bidsfolder / 'code' / 'bidscoin' # Start logging bids.setup_logging(bidscoinfolder / 'bidsmapper.log') LOGGER.info('') LOGGER.info('-------------- START BIDSmapper ------------') LOGGER.info( f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} " f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}" ) # Get the heuristics for filling the new bidsmap bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidscoinfolder) template, _ = bids.load_bidsmap(templatefile, bidscoinfolder) # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists) if bidsmap_old: bidsmap_new = copy.deepcopy(bidsmap_old) else: bidsmap_new = copy.deepcopy(template) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for modality in bids.bidsmodalities + (bids.unknownmodality, bids.ignoremodality): if bidsmap_new[logic] and modality in bidsmap_new[logic]: bidsmap_new[logic][modality] = None # Start with an empty skeleton if we didn't have an old bidsmap if not bidsmap_old: bidsmap_old = copy.deepcopy(bidsmap_new) # Start the Qt-application gui = interactive if gui: app = QApplication(sys.argv) app.setApplicationName('BIDS editor') mainwin = bidseditor.MainWindow() gui = bidseditor.Ui_MainWindow() gui.interactive = interactive gui.subprefix = subprefix gui.sesprefix = sesprefix if gui.interactive == 2: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper will now scan {bidsfolder} and whenever " f"it detects a new type of scan it will ask you to identify it.\n\n" f"It is important that you choose the correct BIDS modality " f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n" f"At the end you will be shown an overview of all the " f"different scan types and BIDScoin options (as in the " f"bidseditor) that you can then (re)edit to your needs") # Loop over all subjects and sessions and built up the bidsmap entries dataformat = '' subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*') gui = None for n, subject in enumerate(subjects, 1): sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix, '*') if unpacked: store = dict(source=unpacked, target=bidscoinfolder / 'provenance') elif store: store = dict(source=rawfolder, target=bidscoinfolder / 'provenance') else: store = dict() # Loop of the different DICOM runs (series) and collect source files sourcefiles = [] dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info( f"Skipping: {session} (subject {n}/{len(subjects)})") continue LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})") if dataformat == 'DICOM': for sourcedir in bids.lsdirs(session): sourcefile = bids.get_dicomfile(sourcedir) if sourcefile.name: sourcefiles.append(sourcefile) if dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) if dataformat == 'P7': sourcefiles = bids.get_p7file(session) # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: bidsmap_new = build_bidsmap(dataformat, sourcefile, bidsmap_new, bidsmap_old, template, store, gui) # Update / append the nifti mapping if dataformat == 'Nifti': bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old) # Update / append the file-system mapping if dataformat == 'FileSystem': bidsmap_new = build_filesystemmap(session, bidsmap_new, bidsmap_old) # Update / append the plugin mapping if bidsmap_old['PlugIns']: bidsmap_new = build_pluginmap(session, bidsmap_new, bidsmap_old) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) if not dataformat: LOGGER.warning('Could not determine the dataformat of the source data') # (Re)launch the bidseditor UI_MainWindow bidsmapfile = bidscoinfolder / 'bidsmap.yaml' if gui: if not dataformat: QMessageBox.information( mainwin, 'BIDS mapping workflow', 'Could not determine the dataformat of the source data.\n' 'You can try running the bidseditor tool yourself') else: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " f"and BIDScoin options and (re)edit them to your needs.\n\n" f"You can always redo this step later by re-running the " f"bidsmapper or by just running the bidseditor tool") LOGGER.info('Opening the bidseditor') gui.setupUi(mainwin, bidsfolder, bidsmapfile, bidsmap_new, copy.deepcopy(bidsmap_new), template, dataformat, subprefix=subprefix, sesprefix=sesprefix) mainwin.show() app.exec() else: # Save the bidsmap in the bidscoinfolder bids.save_bidsmap(bidsmapfile, bidsmap_new) LOGGER.info('-------------- FINISHED! -------------------') LOGGER.info('') bids.reporterrors()