def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started and see what dataformat we have options = bidsmap['Options']['plugins']['dcm2niix2bids'] datasource = bids.get_datasource(session, {'dcm2niix2bids': options}) dataformat = datasource.dataformat if not dataformat: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Make a list of all the data sources / runs manufacturer = 'UNKNOWN' sources = [] if dataformat == 'DICOM': sources = bidscoin.lsdirs(session) manufacturer = datasource.attributes('Manufacturer') elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders sourcefile = Path() for source in sources: # Get a sourcefile if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datasource.datatype else: outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval', '.bvec'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile( source, 2).name: # TODO: issue warning or support PAR LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( command=options['command'], args=options.get('args', ''), filename=bidsname, outfolder=outfolder, source=source) if not bidscoin.run_command(command): if not list(outfolder.glob(f"{bidsname}.nii*")): continue if list(outfolder.glob(f"{bidsname}a.nii*")): LOGGER.warning( f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)" ) # Replace uncropped output image with the cropped one if '-x y' in options.get('args', ''): for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC', '_fieldmaphz') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*.nii*") ])) if not jsonfiles[0].is_file( ) and dcm2niixfiles: # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo) jsonfiles.pop(0) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if 'echo' in run['bids'] and postfix.startswith('e'): echonr = f"_{postfix}".replace('_e', '') # E.g. postfix='e1' if not echonr: echonr = '1' if echonr.isnumeric(): newbidsname = bids.insert_bidskeyval( newbidsname, 'echo', echonr.lstrip('0') ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness else: LOGGER.error( f"Unexpected postix '{postfix}' found in {dcm2niixfile}" ) newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix ) # Append the unknown postfix to the acq-label # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info elif 'part' in run['bids'] and postfix in ( 'ph', 'real', 'imaginary' ): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] if postfix == 'ph': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'phase') if postfix == 'real': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'real') if postfix == 'imaginary': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'imag') # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][ 'suffixes']: # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions if len(dcm2niixfiles) not in ( 1, 2, 3, 4 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.debug( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1' ) # Case 2 = Two phase and magnitude images newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') # Case 2 if len(dcm2niixfiles) in ( 2, 3 ): # Case 1 = One or two magnitude + one phasediff image newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phasediff') newbidsname = newbidsname.replace( '_phasediff_e1', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_e2', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_ph', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') # Case 2 newbidsname = newbidsname.replace( '_phase1_e2', '_phase2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') # Case 2 newbidsname = newbidsname.replace( '_phase1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude' ) # Case 3 = One magnitude + one fieldmap image if len(dcm2niixfiles) == 2: newbidsname = newbidsname.replace( '_fieldmap_e1', '_magnitude' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_e1', '_fieldmap') # Case 3 newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') # Case 3 # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # The ADC images are not BIDS compliant if postfix == 'ADC': LOGGER.warning( f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file" ) # Save the nifti file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) # Rename all associated files (i.e. the json-, bval- and bvec-files) oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json') newjsonfile = newbidsfile.with_suffix('').with_suffix('.json') if not oldjsonfile.is_file(): LOGGER.warning( f"Unexpected file conversion result: {oldjsonfile} not found" ) else: if oldjsonfile in jsonfiles: jsonfiles.remove(oldjsonfile) if newjsonfile not in jsonfiles: jsonfiles.append(newjsonfile) for oldfile in outfolder.glob( dcm2niixfile.with_suffix('').stem + '.*'): oldfile.replace( newjsonfile.with_suffix(''.join(oldfile.suffixes))) # Copy over the source meta-data metadata = bids.copymetadata(sourcefile, outfolder / bidsname, options.get('meta', [])) # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Load the json meta-data with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) # Add all the source meta data to the meta-data for metakey, metaval in metadata.items(): if jsondata.get(metakey) == metaval: LOGGER.warning( f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}" ) jsondata[metakey] = metaval # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later for metakey, metaval in run['meta'].items(): if metakey != 'IntendedFor': metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info( f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Remove unused (but added from the template) B0FieldIdentifiers/Sources if not jsondata.get('B0FieldSource'): jsondata.pop('B0FieldSource', None) if not jsondata.get('B0FieldIdentifier'): jsondata.pop('B0FieldIdentifier', None) # Save the meta-data to the json sidecar-file with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition) outputfile = [ file for file in jsonfile.parent.glob(jsonfile.stem + '.*') if file.suffix in ('.nii', '.gz') ] # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension) if not outputfile: LOGGER.exception( f"No data-file found with {jsonfile} when updating {scans_tsv}" ) elif datasource.datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datasource.datatype): acq_time = '' if dataformat == 'DICOM': acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}" elif dataformat == 'PAR': acq_time = datasource.attributes('exam_date') if not acq_time or acq_time == 'T': acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}" try: acq_time = dateutil.parser.parse(acq_time) if options.get('anon', 'y') in ('y', 'yes'): acq_time = acq_time.replace( year=1925, month=1, day=1 ) # Privacy protection (see BIDS specification) acq_time = acq_time.isoformat() except Exception as jsonerror: LOGGER.warning( f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}" ) acq_time = 'n/a' scanpath = outputfile[0].relative_to(bidsses) scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Collect personal data from a source header (PAR/XML does not contain personal info) personals = {} if sesid and 'session_id' not in personals: personals['session_id'] = sesid personals['age'] = '' if dataformat == 'DICOM': age = datasource.attributes( 'PatientAge' ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age: if options.get('anon', 'y') in ('y', 'yes'): age = int(float(age)) personals['age'] = str(age) personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') # Store the collected personals in the participants_table participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification for key in personals: # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_table or participants_table[key].isnull( ).get(subid, True) or participants_table[key].get(subid) == 'n/a': participants_table.loc[subid, key] = personals[key] # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the logic to map the DICOM/PAR source fields onto bids labels go into this function :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get started plugin = { 'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids'] } datasource = bids.get_datasource(session, plugin) dataformat = datasource.dataformat if not dataformat: return # Collect the different DICOM/PAR source files for all runs in the session sourcefiles = [] if dataformat == 'DICOM': for sourcedir in bidscoin.lsdirs(session): for n in range( 1 ): # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-() sourcefile = bids.get_dicomfile(sourcedir, n) if sourcefile.name: sourcefiles.append(sourcefile) elif dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: # Input checks if not sourcefile.name or (not template[dataformat] and not bidsmap_old[dataformat]): LOGGER.error( f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}" ) return # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugin, dataformat) run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}" )
def coin_dicom(session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ if not bids.lsdirs(session): LOGGER.warning(f"No run subfolder(s) found in: {session}") return TE = [None, None] # Get valid BIDS subject/session identifiers from the (first) dicom-header or from the session source folder subid, sesid = bids.get_subid_sesid( bids.get_dicomfile(bids.lsdirs(session)[0]), bidsmap['DICOM']['subject'], bidsmap['DICOM']['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the dicom run subfolders for runfolder in bids.lsdirs(session): # Get a dicom-file dicomfile = bids.get_dicomfile(runfolder) if not dicomfile.name: continue # Get a matching run from the bidsmap run, modality, index = bids.get_matching_run(dicomfile, bidsmap) # Check if we should ignore this run if modality == bids.ignoremodality: LOGGER.info(f"Leaving out: {runfolder}") continue # Check if we already know this run if index is None: LOGGER.warning( f"Skipping unknown '{modality}': {dicomfile}\n-> re-run the bidsmapper and delete {session} to solve this warning" ) continue LOGGER.info(f"Processing: {runfolder}") # Create the BIDS session/modality folder bidsmodality = bidsses / modality bidsmodality.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, modality, run) runindex = run['bids']['run'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(bidsmodality, bidsname) # Check if file already exists (-> e.g. when a static runindex is used) if (bidsmodality / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{bidsmodality/bidsname}.* already exists -- check your results carefully!" ) # Convert the dicom-files in the run folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=runfolder) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for filename in sorted( bidsmodality.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(filename.suffixes) newfilename = str(filename).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}" ) filename.replace(newfilename) # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) -- possibly > 1 for dcm2niisuffix in ('_c', '_e', '_ph', '_i'): for filename in sorted( bidsmodality.glob(bidsname + dcm2niisuffix + '*')): ext = ''.join(filename.suffixes) basepath, index = str(filename).rsplit(ext, 1)[0].rsplit( dcm2niisuffix, 1 ) # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1) basesuffix = basepath.rsplit( '_', 1 )[1] # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1 index = index.split('_')[0].zfill( 2 ) # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files) # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file if dcm2niisuffix in ( '_c', '_e' ) and int(index) == 2 and basesuffix not in [ 'magnitude1', 'phase1' ]: # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below) filename_ce = Path( basepath + ext) # The file without the _c1/_e1 suffix if dcm2niisuffix == '_e' and bids.get_bidsvalue( basepath, 'echo'): newbasepath_ce = Path( bids.get_bidsvalue(basepath, 'echo', '1')) else: newbasepath_ce = Path( bids.get_bidsvalue( basepath, 'dummy', dcm2niisuffix.upper() + '1'.zfill(len(index))) ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data newfilename_ce = newbasepath_ce.with_suffix( ext) # The file as it should have been if filename_ce.is_file(): if filename_ce != newfilename_ce: LOGGER.info( f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}" ) filename_ce.replace(newfilename_ce) if ext == '.json': jsonfiles.append( newbasepath_ce.with_suffix('.json')) # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder) if dcm2niisuffix == '_e' and bids.get_bidsvalue( basepath, 'echo') and index: basepath = bids.get_bidsvalue( basepath, 'echo', str(int(index)) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness elif dcm2niisuffix == '_e' and basesuffix in ( 'magnitude1', 'magnitude2') and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_magnitude1_e[index] -> *_magnitude[index] # Collect the echo times that need to be added to the json-file (see below) if filename.suffix == '.json': with filename.open('r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] LOGGER.info( f"Collected EchoTime{index} = {data['EchoTime']} from: {filename}" ) elif dcm2niisuffix == '_e' and basesuffix == 'phasediff' and index: # i.e. modality == 'fmap' pass elif dcm2niisuffix == '_e' and basesuffix in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_phase1_e[index]_ph -> *_phase[index] else: basepath = bids.get_bidsvalue( basepath, 'dummy', dcm2niisuffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data # Save the file with a new name newbidsname = str(Path(basepath).name) if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, newbidsname, ext ) # Update the runindex now that the acq-label has changed newfilename = (bidsmodality / newbidsname).with_suffix(ext) LOGGER.info( f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}" ) filename.replace(newfilename) if ext == '.json': jsonfiles.append( (bidsmodality / newbidsname).with_suffix('.json')) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) if not jsonfiles: jsonfiles = [(bidsmodality / bidsname).with_suffix('.json')] for jsonfile in set(jsonfiles): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude runs have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-runs being saved after the magnitude runs elif modality == 'fmap': if run['bids']['suffix'] == 'phasediff': LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) if TE[0] is None or TE[1] is None: LOGGER.warning( f"Missing Echo-Time data for: {jsonfile}") elif TE[0] > TE[1]: LOGGER.warning( f"EchoTime1 > EchoTime2 for: {jsonfile}") with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the dicom header (NB: assuming the dicom file represents the first aqcuisition) with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data: data['AcquisitionTime'] = bids.get_dicomfield( 'AcquisitionTime', dicomfile) acq_time = dateutil.parser.parse(data['AcquisitionTime']) scanpath = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') )[0].relative_to( bidsses ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Save the IntendedFor data in the json-files (account for multiple runs and dcm2niix suffixes inserted into the acquisition label) acqlabel = bids.get_bidsvalue(bidsname, 'acq') for jsonfile in list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_') + '.json')) + \ list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_').replace(acqlabel, acqlabel+'[CE][0-9]*') + '.json')): if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 and phase2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same runfolder) if jsonfile.name.endswith( 'magnitude1.json') or jsonfile.name.endswith( 'phase1.json'): jsonfile2 = jsonfile.with_name( jsonfile.name.rsplit('1.json', 1)[0] + '2.json') if jsonfile2.is_file(): with jsonfile2.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info( f"Adding IntendedFor to: {jsonfile2}") else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile2}: the search for {intendedfor} gave no results" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile2.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header: only from the first session (-> BIDS specification) if 'runfolder' in locals(): dicomfile = bids.get_dicomfile(runfolder) personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return age = bids.get_dicomfield( 'PatientAge', dicomfile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param dataformat: The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat) :param session: The full-path name of the subject/session source file/folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file if dataformat == 'DICOM': sourcefile = Path() sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) manufacturer = bids.get_dicomfield('Manufacturer', sourcefile) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return if not sources: LOGGER.info(f"No data found for: {session}") return subid, sesid = bids.get_subid_sesid(sourcefile, bidsmap[dataformat]['subject'], bidsmap[dataformat]['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders for source in sources: # Get a source-file if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap run, datatype, index = bids.get_matching_run(sourcefile, bidsmap, dataformat) # Check if we should ignore this run if datatype == bids.ignoredatatype: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if index is None: LOGGER.error( f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datatype else: outfolder = bidsses / datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec', 'tsv.gz'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile(source, 2).name: LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=outfolder, source=source) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*") ])) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if postfix[0] == 'e' and bids.get_bidsvalue( newbidsname, 'echo' ): # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix echonr = f"_{postfix}" # E.g. echonr='_e1' or echonr='_pha' for dcm2niixpostfix in dcm2niixpostfixes: echonr = echonr.replace( dcm2niixpostfix, '' ) # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a'] if echonr.isalpha(): echonr = ord( echonr ) - 95 # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc elif not echonr: echonr = 1 newbidsname = bids.get_bidsvalue( newbidsname, 'echo', str(echonr) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in ('magnitude', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'phasediff', 'fieldmap'): if len(dcm2niixfiles) not in ( 0, 2, 4, 6, 8 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.warning( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude') newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1') newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2') if len(dcm2niixfiles) == 8: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Two magnitude + 2 phase images in one folder / datasource else: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff' ) # One or two magnitude + 1 phasediff image newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2') newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2') newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase1_ph', '_phase1') newbidsname = newbidsname.replace( '_phase1_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_ph', '_phase2') # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # Save the file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) if ext == '.json': oldjsonfile = (outfolder / bidsname).with_suffix('.json') if oldjsonfile in jsonfiles and not oldjsonfile.is_file(): jsonfiles.remove( (outfolder / bidsname).with_suffix('.json')) jsonfiles.append(newbidsfile) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if datatype == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif datatype == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition) niifile = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) if niifile and datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datatype): with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data or not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'AcquisitionTime', sourcefile) # DICOM if not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'exam_date', sourcefile) # PAR/XML try: acq_time = dateutil.parser.parse(data['AcquisitionTime']) except: LOGGER.warning( f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}" ) acq_time = dateutil.parser.parse('00:00:00') scanpath = niifile[0].relative_to(bidsses) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk) if bidsmap[dataformat]['fmap'] is not None: for fieldmap in bidsmap[dataformat]['fmap']: bidsname = bids.get_bidsname(subid, sesid, fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label) jsonfiles = [] acqlabel = bids.get_bidsvalue(bidsname, 'acq') patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_magnitude*').replace('_magnitude2', '_magnitude*').replace( '_phase1', '_phase*').replace('_phase2', '_phase*'), bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_phase*').replace('_magnitude2', '_phase*')) for pattern in patterns: jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json')) if acqlabel: cepattern = bids.get_bidsvalue(pattern, 'acq', acqlabel + '[CE][0-9]*') jsonfiles.extend( list((bidsses / 'fmap').glob(cepattern + '.json'))) # Save the meta-data in the jsonfiles for jsonfile in sorted(set(jsonfiles)): # Add the IntendedFor data with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file if jsonfile.name.endswith('phasediff.json'): json_magnitude = [None, None] TE = [None, None] for n in (0, 1): json_magnitude[ n] = jsonfile.parent / jsonfile.name.replace( '_phasediff', f"_magnitude{n+1}") if not json_magnitude[n].is_file(): LOGGER.error( f"Could not find expected magnitude{n+1} image associated with: {jsonfile}" ) else: with json_magnitude[n].open('r') as json_fid: data = json.load(json_fid) TE[n] = data['EchoTime'] if None in TE: LOGGER.error( f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}" ) elif TE[0] > TE[1]: LOGGER.error( f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}" ) else: with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return # Only take data from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the heuristics spec2nii2bids attributes and properties onto bids labels and meta-data go into this plugin function. The function is expected to update / append new runs to the bidsmap_new data structure. The bidsmap options for this plugin are stored in: bidsmap_new['Options']['plugins']['spec2nii2bids'] :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get the plugin settings plugin = { 'spec2nii2bids': bidsmap_new['Options']['plugins']['spec2nii2bids'] } # Update the bidsmap with the info from the source files for sourcefile in [ file for file in session.rglob('*') if is_sourcefile(file) ]: datasource = bids.DataSource(sourcefile, plugin) dataformat = datasource.dataformat # Input checks if not template[dataformat] and not bidsmap_old[dataformat]: LOGGER.error( f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}" ) return if not template.get(dataformat) and not bidsmap_old.get(dataformat): LOGGER.error( f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}" ) return # See if we can find a matching run in the old bidsmap run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}" )
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ This wrapper funtion around spec2nii converts the MRS data in the session folder and saves it in the bidsfolder. Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in: bidsmap_new['Options']['plugins']['spec2nii2bids'] :param session: The full-path name of the subject/session raw data source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started and see what dataformat we have options = bidsmap['Options']['plugins']['spec2nii2bids'] datasource = bids.get_datasource(session, {'spec2nii2bids': options}) dataformat = datasource.dataformat sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] if not sourcefiles: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Loop over all MRS source data files and convert them to BIDS for sourcefile in sourcefiles: # Get a data source, a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'spec2nii2bids': options}) run, index = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {sourcefile}") continue # Check that we know this run if index is None: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the MRS output data in {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {sourcefile}") # Create the BIDS session/datatype output folder outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfile = (outfolder / bidsname).with_suffix('.json') # Check if file already exists (-> e.g. when a static runindex is used) if jsonfile.is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec', '.tsv.gz'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Run spec2nii to convert the source-files in the run folder to nifti's in the BIDS-folder arg = '' args = options.get('args', OPTIONS['args']) if args is None: args = '' if dataformat == 'SPAR': dformat = 'philips' arg = f'"{sourcefile.with_suffix(".SDAT")}"' elif dataformat == 'Twix': dformat = 'twix' arg = '-e image' elif dataformat == 'Pfile': dformat = 'ge' else: LOGGER.exception(f"Unsupported dataformat: {dataformat}") command = options.get("command", "spec2nii") if not bidscoin.run_command( f'{command} {dformat} -j -f "{bidsname}" -o "{outfolder}" {args} {arg} "{sourcefile}"' ): if not list(outfolder.glob(f"{bidsname}.nii*")): continue # Load and adapt the newly produced json sidecar-file (NB: assumes every nifti-file comes with a json-file) with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) # Copy over the source meta-data metadata = bids.copymetadata(sourcefile, outfolder / bidsname, options.get('meta', [])) for metakey, metaval in metadata.items(): if jsondata.get(metakey) == metaval: LOGGER.warning( f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}" ) jsondata[metakey] = metaval # Add all the meta data to the json-file for metakey, metaval in run['meta'].items(): metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Save the meta data to disk with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition) if datasource.datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datasource.datatype): acq_time = '' if dataformat == 'SPAR': acq_time = datasource.attributes('scan_date') elif dataformat == 'Twix': acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}" elif dataformat == 'Pfile': acq_time = f"{datasource.attributes('rhr_rh_scan_date')}T{datasource.attributes('rhr_rh_scan_time')}" if not acq_time or acq_time == 'T': acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}" try: acq_time = dateutil.parser.parse(acq_time) if options.get('anon', OPTIONS['anon']) in ('y', 'yes'): acq_time = acq_time.replace( year=1925, month=1, day=1) # Privacy protection (see BIDS specification) acq_time = acq_time.isoformat() except Exception as jsonerror: LOGGER.warning( f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}" ) acq_time = 'n/a' scans_table.loc[jsonfile.with_suffix('.nii.gz'). relative_to(bidsses).as_posix(), 'acq_time'] = acq_time # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Collect personal data from a source header personals = {} if sesid and 'session_id' not in personals: personals['session_id'] = sesid age = '' if sesid and 'session_id' not in personals: personals['session_id'] = sesid if dataformat == 'Twix': personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') age = datasource.attributes( 'PatientAge' ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY elif dataformat == 'Pfile': sex = datasource.attributes('rhe_patsex') if sex == '0': personals['sex'] = 'O' elif sex == '1': personals['sex'] = 'M' elif sex == '2': personals['sex'] = 'F' age = dateutil.parser.parse( datasource.attributes('rhr_rh_scan_date')) - dateutil.parser.parse( datasource.attributes('rhe_dateofbirth')) age = str(age.days) + 'D' if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age and options.get('anon', OPTIONS['anon']) in ('y', 'yes'): age = int(float(age)) personals['age'] = str(age) # Store the collected personals in the participants_table participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification for key in personals: # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_table or participants_table[key].isnull( ).get(subid, True) or participants_table[key].get(subid) == 'n/a': participants_table.loc[subid, key] = personals[key] # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
def build_dicommap(runfolder: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, gui: object) -> dict: """ All the logic to map dicom-attributes (fields/tags) onto bids-labels go into this function :param runfolder: The full-path name of the series-folder containing source dicom-files :param bidsmap_new: The bidsmap that we are building :param bidsmap_old: Full BIDS heuristics data structure, with all options, BIDS labels and attributes, etc :param template: The bidsmap template with the default heuristics :param gui: If not None, the user will not be asked for help if an unknown run is encountered :return: The bidsmap with new entries in it """ # Input checks dicomfile = bids.get_dicomfile(runfolder) if not dicomfile.name or (not template['DICOM'] and not bidsmap_old['DICOM']): LOGGER.info('No DICOM information found in the bidsmap and template') return bidsmap_new # See if we can find a matching run in the old bidsmap run, modality, index = bids.get_matching_run(dicomfile, bidsmap_old) # If not, see if we can find a matching run in the template if index is None: run, modality, _ = bids.get_matching_run(dicomfile, template) # See if we have collected the run in our new bidsmap if not bids.exist_run(bidsmap_new, 'DICOM', '', run): # Copy the filled-in run over to the new bidsmap bidsmap_new = bids.append_run(bidsmap_new, 'DICOM', modality, run) # Communicate with the user if the run was not present in bidsmap_old or in template LOGGER.info(f"New '{modality}' sample found: {dicomfile}") # Launch a GUI to ask the user for help if the new run comes from the template (i.e. was not yet in the old bidsmap) if gui and gui.interactive == 2 and index is None: # Open the interactive edit window to get the new mapping dialog_edit = bidseditor.EditDialog(dicomfile, modality, bidsmap_new, template, gui.subprefix, gui.sesprefix) dialog_edit.exec() # Get the result if dialog_edit.result() == 1: # The user has finished the edit bidsmap_new = dialog_edit.target_bidsmap elif dialog_edit.result() in [ 0, 2 ]: # The user has canceled / aborted the edit answer = QMessageBox.question( None, 'BIDSmapper', 'Do you want to abort and quit the bidsmapper?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if answer == QMessageBox.No: pass if answer == QMessageBox.Yes: LOGGER.info('User has quit the bidsmapper') sys.exit() else: LOGGER.debug( f'Unexpected result {dialog_edit.result()} from the edit dialog' ) return bidsmap_new
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ This wrapper funtion around phys2bids converts the physio data in the session folder and saves it in the bidsfolder. Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in: bidsmap_new['Options']['plugins']['phys2bidscoin'] See also the dcm2niix2bids plugin for reference implementation :param session: The full-path name of the subject/session raw data source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started plugin = {'phys2bidscoin': bidsmap['Options']['plugins']['phys2bidscoin']} datasource = bids.get_datasource(session, plugin) sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] if not sourcefiles: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Loop over all source data files and convert them to BIDS for sourcefile in sourcefiles: # Get a data source, a matching run from the bidsmap datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {sourcefile}") continue # Check that we know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the physiological output data in {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {sourcefile}") # Get an ordered list of the func runs from the scans.tsv file (which should have a standardized datetime format) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) else: LOGGER.error( f"Could not read the TR's for phys2bids due to a missing '{scans_tsv}' file" ) continue funcscans = [] for index, row in scans_table.iterrows(): if index.startswith('func/'): funcscans.append(index) # Then read the TR's from the associated func sidecar files tr = [] for funcscan in funcscans: with (bidsses / funcscan).with_suffix('.json').open('r') as json_fid: jsondata = json.load(json_fid) tr.append(jsondata['RepetitionTime']) # Create a heuristic function for phys2bids heur_str = ('def heur(physinfo, run=""):\n' ' info = {}\n' f' if physinfo == "{sourcefile.name}":') for key, val in run['bids'].items(): heur_str = (f'{heur_str}' f'\n info["{key}"] = "{val}"') heur_str = f'{heur_str}\n return info' # Write heuristic function as file in temporary folder heur_file = Path( tempfile.mkdtemp()) / f'heuristic_sub-{subid}_ses-{sesid}.py' heur_file.write_text(heur_str) # Run phys2bids physiofiles = phys2bids( filename=str(sourcefile), outdir=str(bidsfolder), heur_file=str(heur_file), sub=subid, ses=sesid, chtrig=int(run['meta'].get('TriggerChannel', 0)), num_timepoints_expected=run['meta'].get('ExpectedTimepoints', None), tr=tr, pad=run['meta'].get('Pad', 9), ch_name=run['meta'].get('ChannelNames', []), yml='', debug=True, quiet=False) # Add user-specified meta-data to the newly produced json files (NB: assumes every physio-file comes with a json-file) for physiofile in physiofiles: jsonfile = Path(physiofile).with_suffix('.json') if not jsonfile.is_file(): LOGGER.error( f"Could not find the expected json sidecar-file: '{jsonfile}'" ) continue with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) for metakey, metaval in run['meta'].items(): metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4)
def build_bidsmap(dataformat: str, sourcefile: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict, gui: object) -> dict: """ All the logic to map the Philips PAR/XML fields onto bids labels go into this function :param dataformat: The information source in the bidsmap that is used, e.g. 'DICOM' :param sourcefile: The full-path name of the source file :param bidsmap_new: The bidsmap that we are building :param bidsmap_old: Full BIDS heuristics data structure, with all options, BIDS labels and attributes, etc :param template: The bidsmap template with the default heuristics :param store: The paths of the source- and target-folder :param gui: If not None, the user will not be asked for help if an unknown run is encountered :return: The bidsmap with new entries in it """ # Input checks if not sourcefile.name or (not template[dataformat] and not bidsmap_old[dataformat]): LOGGER.info( f"No {dataformat} source information found in the bidsmap and template" ) return bidsmap_new # See if we can find a matching run in the old bidsmap run, modality, index = bids.get_matching_run(sourcefile, bidsmap_old, dataformat) # If not, see if we can find a matching run in the template if index is None: run, modality, _ = bids.get_matching_run(sourcefile, template, dataformat) # See if we have collected the run in our new bidsmap if not bids.exist_run(bidsmap_new, dataformat, '', run): # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) sourcefile = Path(shutil.copy2(sourcefile, targetfile)) run['provenance'] = str(sourcefile.resolve()) # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample LOGGER.info(f"Found '{modality}' {dataformat} sample: {sourcefile}") # Copy the filled-in run over to the new bidsmap bidsmap_new = bids.append_run(bidsmap_new, dataformat, modality, run) # Launch a GUI to ask the user for help if the new run comes from the template (i.e. was not yet in the old bidsmap) if gui and gui.interactive == 2 and index is None: # Open the interactive edit window to get the new mapping dialog_edit = bidseditor.EditDialog(dataformat, sourcefile, modality, bidsmap_new, template, gui.subprefix, gui.sesprefix) dialog_edit.exec() # Get the result if dialog_edit.result() == 1: # The user has finished the edit bidsmap_new = dialog_edit.target_bidsmap elif dialog_edit.result() in [ 0, 2 ]: # The user has canceled / aborted the edit answer = QMessageBox.question( None, 'BIDSmapper', 'Do you want to abort and quit the bidsmapper?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if answer == QMessageBox.No: pass if answer == QMessageBox.Yes: LOGGER.info('User has quit the bidsmapper') sys.exit() else: LOGGER.debug( f'Unexpected result {dialog_edit.result()} from the edit dialog' ) return bidsmap_new
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the logic to map the Nibabel header fields onto bids labels go into this function :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get started plugin = { 'nibabel2bids': bidsmap_new['Options']['plugins']['nibabel2bids'] } datasource = bids.get_datasource(session, plugin, recurse=2) if not datasource.dataformat: return if not (template[datasource.dataformat] or bidsmap_old[datasource.dataformat]): LOGGER.error( f"No {datasource.dataformat} source information found in the bidsmap and template" ) return # Collect the different DICOM/PAR source files for all runs in the session for sourcefile in [ file for file in session.rglob('*') if is_sourcefile(file) ]: # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat) run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}" )
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ The bidscoiner plugin to convert the session Nibabel source-files into BIDS-valid nifti-files in the corresponding bids session-folder :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started options = bidsmap['Options']['plugins']['nibabel2bids'] ext = options.get('ext', OPTIONS['ext']) meta = options.get('meta', []) sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] if not sourcefiles: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Collect the different Nibabel source files for all files in the session for sourcefile in sourcefiles: datasource = bids.DataSource(sourcefile, {'nibabel2bids': options}) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {sourcefile}") continue # Check if we already know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {sourcefile}") # Create the BIDS session/datatype output folder outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) bidsfile = (outfolder / bidsname).with_suffix(ext) # Check if file already exists (-> e.g. when a static runindex is used) if bidsfile.is_file(): LOGGER.warning( f"{bidsfile}.* already exists and will be deleted -- check your results carefully!" ) bidsfile.with_suffix('').with_suffix(ext).unlink() # Save the sourcefile as a BIDS nifti file nib.save(nib.load(sourcefile), bidsfile) # Copy over the source meta-data jsonfile = bidsfile.with_suffix('').with_suffix('.json') jsondata = bids.copymetadata(sourcefile, bidsfile, meta) # Add all the meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later for metakey, metaval in run['meta'].items(): if metakey != 'IntendedFor': metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Remove unused (but added from the template) B0FieldIdentifiers/Sources if not jsondata.get('B0FieldSource'): jsondata.pop('B0FieldSource', None) if not jsondata.get('B0FieldIdentifier'): jsondata.pop('B0FieldIdentifier', None) # Save the meta-data to the json sidecar-file with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Add an entry to the scans_table (we typically don't have useful data to put there) acq_time = dateutil.parser.parse( f"1925-01-01T{jsondata.get('AcquisitionTime', '')}") scans_table.loc[bidsfile.relative_to(bidsses).as_posix(), 'acq_time'] = acq_time.isoformat() # Write the scans_table to disk LOGGER.info(f"Writing data to: {scans_tsv}") scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Add an (empty) entry to the participants_table (we don't have useful data to put there) participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification participants_table.loc[subid, 'session_id'] = sesid if sesid else None # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')