def test_bids_version(self): bids_v = bidsversion() bids_v_from_file = '' with open('bidsversion.txt') as fp: bids_v_from_file = fp.read() self.assertEqual(bids_v, bids_v_from_file)
def bidscoiner(rawfolder: str, bidsfolder: str, subjects: tuple = (), force: bool = False, participants: bool = False, bidsmapfile: str = 'code' + os.sep + 'bidsmap.yaml', subprefix: str = 'sub-', sesprefix: str = 'ses-') -> None: """ Main function that processes all the subjects and session in the rawfolder and uses the bidsmap.yaml file in bidsfolder/code to cast the data into the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param subjects: List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the rawfolder will be selected :param force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/ :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Input checking & defaults rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) bidsfolder = os.path.abspath(os.path.expanduser(bidsfolder)) logfile = os.path.join(bidsfolder, 'code', 'bidscoiner.log') setup_logging(logfile) # Create a code subfolder os.makedirs(os.path.join(bidsfolder, 'code'), exist_ok=True) if not os.path.isfile(os.path.join(bidsfolder, '.bidsignore')): with open(os.path.join(bidsfolder, '.bidsignore'), 'w') as bidsignore: bidsignore.write(bids.unknownmodality + os.sep) # Start logging logger.info( f'------------ START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------' ) logger.info( f'>>> bidscoiner rawfolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}' f' participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}' ) # Create a dataset description file if it does not exist dataset_file = os.path.join(bidsfolder, 'dataset_description.json') if not os.path.isfile(dataset_file): dataset_description = { "Name": "REQUIRED. Name of the dataset", "BIDSVersion": bids.bidsversion(), "License": "RECOMMENDED. What license is this dataset distributed under?. The use of license name abbreviations is suggested for specifying a license", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } logger.info('Creating dataset description file: ' + dataset_file) with open(dataset_file, 'w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = os.path.join(bidsfolder, 'README') if not os.path.isfile(readme_file): logger.info('Creating README file: ' + readme_file) with open(readme_file, 'w') as fid: fid.write( 'A free form text ( README ) describing the dataset in more details that SHOULD be provided' ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap = bids.get_heuristics(bidsmapfile, os.path.join(bidsfolder, 'code'), logger) # Get the table & dictionary of the subjects that have been processed participants_tsv = os.path.join(bidsfolder, 'participants.tsv') participants_json = os.path.splitext(participants_tsv)[0] + '.json' if os.path.exists(participants_tsv): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if os.path.exists(participants_json): with open(participants_json, 'r') as json_fid: participants_dict = json.load(json_fid) else: participants_dict = dict() # Get the list of subjects if not subjects: subjects = bids.lsdirs(rawfolder, subprefix + '*') else: subjects = [ subprefix + subject.lstrip(subprefix) for subject in subjects ] # Make sure there is a "sub-" prefix subjects = [ os.path.join(rawfolder, subject) for subject in subjects if os.path.isdir(os.path.join(rawfolder, subject)) ] # Loop over all subjects and sessions and convert them using the bidsmap entries for n, subject in enumerate(subjects, 1): if participants and subject in list(participants_table.index): logger.info(f'{"-" * 30}') logger.info(f'Skipping subject: {subject} ({n}/{len(subjects)})') continue logger.info(f'{"-"*30}') logger.info(f'Coining subject: {subject} ({n}/{len(subjects)})') personals = dict() sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Check if we should skip the session-folder if not force and os.path.isdir( session.replace(rawfolder, bidsfolder)): continue # Update / append the dicom mapping if bidsmap['DICOM']: coin_dicom(session, bidsmap, bidsfolder, personals, subprefix, sesprefix) # Update / append the PAR/REC mapping if bidsmap['PAR']: coin_par(session, bidsmap, bidsfolder, personals) # Update / append the P7 mapping if bidsmap['P7']: coin_p7(session, bidsmap, bidsfolder, personals) # Update / append the nifti mapping if bidsmap['Nifti']: coin_nifti(session, bidsmap, bidsfolder, personals) # Update / append the file-system mapping if bidsmap['FileSystem']: coin_filesystem(session, bidsmap, bidsfolder, personals) # Update / append the plugin mapping if bidsmap['PlugIn']: coin_plugin(session, bidsmap, bidsfolder, personals) # Store the collected personals in the participant_table for key in personals: # participant_id is the index of the participants_table assert 'participant_id' in personals if key == 'participant_id': continue # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_dict: participants_dict[key] = dict( LongName='Long (unabbreviated) name of the column', Description='Description of the the column', Levels=dict( Key= 'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))' ), Units= 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is', TermURL= 'URL pointing to a formal definition of this type of data in an ontology available on the web' ) participants_table.loc[personals['participant_id'], key] = personals[key] # Write the collected data to the participant files logger.info('Writing subject data to: ' + participants_tsv) participants_table.to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a') logger.info('Writing subject data dictionary to: ' + participants_json) with open(participants_json, 'w') as json_fid: json.dump(participants_dict, json_fid, indent=4) logger.info('log file: ' + logfile) logger.info('------------ FINISHED! ------------')
def bidscoiner(rawfolder: str, bidsfolder: str, subjects: list = (), force: bool = False, participants: bool = False, bidsmapfile: str = 'bidsmap.yaml', subprefix: str = 'sub-', sesprefix: str = 'ses-') -> None: """ Main function that processes all the subjects and session in the sourcefolder and uses the bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param subjects: List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected :param force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Input checking & defaults rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) # Start logging bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log') LOGGER.info('') LOGGER.info( f"-------------- START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------" ) LOGGER.info( f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}" f" participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}" ) # Create a code/bidscoin subfolder (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True) # Create a dataset description file if it does not exist dataset_file = bidsfolder / 'dataset_description.json' if not dataset_file.is_file(): dataset_description = { "Name": "REQUIRED. Name of the dataset", "BIDSVersion": str(bids.bidsversion()), "DatasetType": "raw", "License": "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "EthicsApprovals": [ "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers" ], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links", "https://github.com/Donders-Institute/bidscoin" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } LOGGER.info(f"Creating dataset description file: {dataset_file}") with open(dataset_file, 'w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = bidsfolder / 'README' if not readme_file.is_file(): LOGGER.info(f"Creating README file: {readme_file}") with open(readme_file, 'w') as fid: fid.write( f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n" f"The raw BIDS data was created using BIDScoin {bids.version()}\n" f"All provenance information and settings can be found in ./code/bidscoin\n" f"For more information see: https://github.com/Donders-Institute/bidscoin" ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap, _ = bids.load_bidsmap(bidsmapfile, bidsfolder / 'code' / 'bidscoin') if not bidsmap: LOGGER.error( f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and / or use the correct bidsfolder" ) return # Save options to the .bidsignore file bidsignore_items = [ item.strip() for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';') ] LOGGER.info( f"Writing {bidsignore_items} entries to {bidsfolder}.bidsignore") with (bidsfolder / '.bidsignore').open('w') as bidsignore: for item in bidsignore_items: bidsignore.write(item + '\n') # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder / 'participants.tsv' participants_json = participants_tsv.with_suffix('.json') if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t') participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if participants_json.is_file(): with participants_json.open('r') as json_fid: participants_dict = json.load(json_fid) else: participants_dict = { 'participant_id': { 'Description': 'Unique participant identifier' } } # Get the list of subjects if not subjects: subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*") else: subjects = [ subprefix + re.sub(f"^{subprefix}", '', subject) for subject in subjects ] # Make sure there is a "sub-" prefix subjects = [ rawfolder / subject for subject in subjects if (rawfolder / subject).is_dir() ] # Loop over all subjects and sessions and convert them using the bidsmap entries for n, subject in enumerate(subjects, 1): LOGGER.info( f"------------------- Subject {n}/{len(subjects)} -------------------" ) if participants and subject.name in list(participants_table.index): LOGGER.info(f"Skipping subject: {subject} ({n}/{len(subjects)})") continue personals = dict() sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix) # See what dataformat we have dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info(f"Skipping unknown session: {session}") continue # Check if we should skip the session-folder if not force: subid, sesid = bids.get_subid_sesid(session / 'dum.my', subprefix=subprefix, sesprefix=sesprefix) bidssession = bidsfolder / subid / sesid if not bidsmap[dataformat]['session']: bidssession = bidssession.parent datatypes = [] for datatype in bids.lsdirs( bidssession ): # See what datatypes we already have in the bids session-folder if datatype.glob('*') and bidsmap[dataformat].get( datatype.name ): # See if we are going to add data for this datatype datatypes.append(datatype.name) if datatypes: LOGGER.info( f"Skipping processed session: {bidssession} already has {datatypes} data (use the -f option to overrule)" ) continue LOGGER.info(f"Coining session: {session}") # Update / append the source data mapping if dataformat in ('DICOM', 'PAR'): coin_data2bids(dataformat, session, bidsmap, bidsfolder, personals, subprefix, sesprefix) # Update / append the P7 mapping if dataformat == 'P7': LOGGER.error( f"{dataformat} not (yet) supported, skipping session: {session}" ) continue # Update / append the nifti mapping if dataformat == 'Nifti': coin_nifti(session, bidsmap, bidsfolder, personals) # Update / append the file-system mapping if dataformat == 'FileSystem': coin_filesystem(session, bidsmap, bidsfolder, personals) # Update / append the plugin mapping if bidsmap['PlugIns']: coin_plugin(session, bidsmap, bidsfolder, personals) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) # Store the collected personals in the participant_table for key in personals: # participant_id is the index of the participants_table assert 'participant_id' in personals if key == 'participant_id': continue # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_dict: participants_dict[key] = dict( LongName='Long (unabbreviated) name of the column', Description='Description of the the column', Levels=dict( Key= 'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))' ), Units= 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED', TermURL= 'URL pointing to a formal definition of this type of data in an ontology available on the web' ) participants_table.loc[personals['participant_id'], key] = personals[key] # Write the collected data to the participant files LOGGER.info(f"Writing subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a') LOGGER.info(f"Writing subject data dictionary to: {participants_json}") with participants_json.open('w') as json_fid: json.dump(participants_dict, json_fid, indent=4) LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') bids.reporterrors()
def bidscoiner(rawfolder, bidsfolder, subjects=(), force=False, participants=False, bidsmapfile='code' + os.sep + 'bidsmap.yaml'): """ Main function that processes all the subjects and session in the rawfolder and uses the bidsmap.yaml file in bidsfolder/code to cast the data into the BIDS folder. :param str rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param str bidsfolder: The name of the BIDS root folder :param list subjects: List of selected sub-# names / folders to be processed. Otherwise all subjects in the rawfolder will be selected :param bool force: If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped :param bool participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True :param str bidsmapfile: The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/ :return: Nothing :rtype: NoneType """ # Input checking rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) bidsfolder = os.path.abspath(os.path.expanduser(bidsfolder)) os.makedirs(os.path.join(bidsfolder, 'code'), exist_ok=True) if not os.path.isfile(os.path.join(bidsfolder, '.bidsignore')): with open(os.path.join(bidsfolder, '.bidsignore'), 'w') as bidsignore: bidsignore.write(bids.unknownmodality + os.sep) # Start logging global logfile logfile = os.path.join(bidsfolder, 'code', 'bidscoiner.log') bids.printlog( '------------ START BIDScoiner {ver}: BIDS {bidsver} ------------\n>>> bidscoiner rawfolder={arg1} bidsfolder={arg2} subjects={arg3} force={arg4} participants={arg5} bidsmap={arg6}' .format(ver=bids.version(), bidsver=bids.bidsversion(), arg1=rawfolder, arg2=bidsfolder, arg3=subjects, arg4=force, arg5=participants, arg6=bidsmapfile), logfile) # Create a dataset description file if it does not exist dataset_file = os.path.join(bidsfolder, 'dataset_description.json') if not os.path.isfile(dataset_file): dataset_description = { "Name": "REQUIRED. Name of the dataset", "BIDSVersion": bids.bidsversion(), "License": "RECOMMENDED. What license is this dataset distributed under?. The use of license name abbreviations is suggested for specifying a license", "Authors": [ "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset" ], "Acknowledgements": "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset", "HowToAcknowledge": "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset", "Funding": ["OPTIONAL. List of sources of funding (grant numbers)"], "ReferencesAndLinks": [ "OPTIONAL. List of references to publication that contain information on the dataset, or links" ], "DatasetDOI": "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)" } bids.printlog('Creating dataset description file: ' + dataset_file, logfile) with open(dataset_file, 'w') as fid: json.dump(dataset_description, fid, indent=4) # Create a README file if it does not exist readme_file = os.path.join(bidsfolder, 'README') if not os.path.isfile(readme_file): bids.printlog('Creating README file: ' + readme_file, logfile) with open(readme_file, 'w') as fid: fid.write( 'A free form text ( README ) describing the dataset in more details that SHOULD be provided' ) # Get the bidsmap heuristics from the bidsmap YAML-file bidsmap = bids.get_heuristics(bidsmapfile, os.path.join(bidsfolder, 'code'), logfile=logfile) # Get the table with subjects that have been processed participants_file = os.path.join(bidsfolder, 'participants.tsv') if os.path.exists(participants_file): participants_table = pd.read_table(participants_file) else: participants_table = pd.DataFrame(columns=['participant_id']) # Get the list of subjects if not subjects: subjects = bids.lsdirs(rawfolder, 'sub-*') else: subjects = [ os.path.join(rawfolder, subject) for subject in subjects if os.path.isdir(os.path.join(rawfolder, subject)) ] # Loop over all subjects and sessions and convert them using the bidsmap entries for subject in subjects: if participants and subject in list(participants_table.participant_id): continue personals = dict() sessions = bids.lsdirs(subject, 'ses-*') if not sessions: sessions = subject for session in sessions: # Check if we should skip the session-folder if not force and os.path.isdir( session.replace(rawfolder, bidsfolder)): continue # Update / append the dicom mapping if bidsmap['DICOM']: coin_dicom(session, bidsmap, bidsfolder, personals) # Update / append the PAR/REC mapping if bidsmap['PAR']: coin_par(session, bidsmap, bidsfolder, personals) # Update / append the P7 mapping if bidsmap['P7']: coin_p7(session, bidsmap, bidsfolder, personals) # Update / append the nifti mapping if bidsmap['Nifti']: coin_nifti(session, bidsmap, bidsfolder, personals) # Update / append the file-system mapping if bidsmap['FileSystem']: coin_filesystem(session, bidsmap, bidsfolder, personals) # Update / append the plugin mapping if bidsmap['PlugIn']: coin_plugin(session, bidsmap, bidsfolder, personals) # Write the collected personals to the participants_file if personals: for key in personals: if key not in participants_table.columns: participants_table[key] = None participants_table = participants_table.append( personals, ignore_index=True, verify_integrity=True) bids.printlog('Writing subject data to: ' + participants_file, logfile) participants_table.to_csv(participants_file, sep='\t', encoding='utf-8', index=False) bids.printlog('------------ FINISHED! ------------', logfile)