def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict, template: dict, store: dict) -> None: """ All the logic to map the DICOM/PAR source fields onto bids labels go into this function :param session: The full-path name of the subject/session raw data source folder :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics :param store: The paths of the source- and target-folder :return: """ # Get started plugin = { 'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids'] } datasource = bids.get_datasource(session, plugin) dataformat = datasource.dataformat if not dataformat: return # Collect the different DICOM/PAR source files for all runs in the session sourcefiles = [] if dataformat == 'DICOM': for sourcedir in bidscoin.lsdirs(session): for n in range( 1 ): # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-() sourcefile = bids.get_dicomfile(sourcedir, n) if sourcefile.name: sourcefiles.append(sourcefile) elif dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: # Input checks if not sourcefile.name or (not template[dataformat] and not bidsmap_old[dataformat]): LOGGER.error( f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}" ) return # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugin, dataformat) run, match = bids.get_matching_run(datasource, bidsmap_old) # If not, see if we can find a matching run in the template if not match: run, _ = bids.get_matching_run(datasource, template) # See if we have collected the run somewhere in our new bidsmap if not bids.exist_run(bidsmap_new, '', run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: LOGGER.info( f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}" ) # Now work from the provenance store if store: targetfile = store['target'] / sourcefile.relative_to( store['source']) targetfile.parent.mkdir(parents=True, exist_ok=True) run['provenance'] = str(shutil.copy2(sourcefile, targetfile)) # Copy the filled-in run over to the new bidsmap bids.append_run(bidsmap_new, run) else: # Communicate with the user if the run was already present in bidsmap_old or in template LOGGER.debug( f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}" )
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None: """ The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsses: The full-path name of the BIDS output `sub-/ses-` folder :return: Nothing """ # Get the subject identifiers and the BIDS root folder from the bidsses folder if bidsses.name.startswith('ses-'): bidsfolder = bidsses.parent.parent subid = bidsses.parent.name sesid = bidsses.name else: bidsfolder = bidsses.parent subid = bidsses.name sesid = '' # Get started and see what dataformat we have options = bidsmap['Options']['plugins']['dcm2niix2bids'] datasource = bids.get_datasource(session, {'dcm2niix2bids': options}) dataformat = datasource.dataformat if not dataformat: LOGGER.info(f"No {__name__} sourcedata found in: {session}") return # Make a list of all the data sources / runs manufacturer = 'UNKNOWN' sources = [] if dataformat == 'DICOM': sources = bidscoin.lsdirs(session) manufacturer = datasource.attributes('Manufacturer') elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' else: LOGGER.exception(f"Unsupported dataformat '{dataformat}'") # Read or create a scans_table and tsv-file scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders sourcefile = Path() for source in sources: # Get a sourcefile if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat) run, match = bids.get_matching_run(datasource, bidsmap, runtime=True) # Check if we should ignore this run if datasource.datatype in bidsmap['Options']['bidscoin'][ 'ignoretypes']: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if not match: LOGGER.error( f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datasource.datatype else: outfolder = bidsses / datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run, runtime=True) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval', '.bvec'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile( source, 2).name: # TODO: issue warning or support PAR LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( command=options['command'], args=options.get('args', ''), filename=bidsname, outfolder=outfolder, source=source) if not bidscoin.run_command(command): if not list(outfolder.glob(f"{bidsname}.nii*")): continue if list(outfolder.glob(f"{bidsname}a.nii*")): LOGGER.warning( f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)" ) # Replace uncropped output image with the cropped one if '-x y' in options.get('args', ''): for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC', '_fieldmaphz') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*.nii*") ])) if not jsonfiles[0].is_file( ) and dcm2niixfiles: # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo) jsonfiles.pop(0) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if 'echo' in run['bids'] and postfix.startswith('e'): echonr = f"_{postfix}".replace('_e', '') # E.g. postfix='e1' if not echonr: echonr = '1' if echonr.isnumeric(): newbidsname = bids.insert_bidskeyval( newbidsname, 'echo', echonr.lstrip('0') ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness else: LOGGER.error( f"Unexpected postix '{postfix}' found in {dcm2niixfile}" ) newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix ) # Append the unknown postfix to the acq-label # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info elif 'part' in run['bids'] and postfix in ( 'ph', 'real', 'imaginary' ): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] if postfix == 'ph': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'phase') if postfix == 'real': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'real') if postfix == 'imaginary': newbidsname = bids.insert_bidskeyval( newbidsname, 'part', 'imag') # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][ 'suffixes']: # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions if len(dcm2niixfiles) not in ( 1, 2, 3, 4 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.debug( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2' ) # First catch this potential weird / rare case newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1' ) # Case 2 = Two phase and magnitude images newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') # Case 2 if len(dcm2niixfiles) in ( 2, 3 ): # Case 1 = One or two magnitude + one phasediff image newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phasediff') newbidsname = newbidsname.replace( '_phasediff_e1', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_e2', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_phasediff_ph', '_phasediff') # Case 1 newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') # Case 2 newbidsname = newbidsname.replace( '_phase1_e2', '_phase2' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') # Case 2 newbidsname = newbidsname.replace( '_phase1_ph', '_phase1' ) # Case 2: One or two magnitude and phase images in one folder / datasource newbidsname = newbidsname.replace( '_phase2_ph', '_phase2' ) # Case 2: Two magnitude + two phase images in one folder / datasource newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude' ) # Case 3 = One magnitude + one fieldmap image if len(dcm2niixfiles) == 2: newbidsname = newbidsname.replace( '_fieldmap_e1', '_magnitude' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_e1', '_fieldmap') # Case 3 newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap' ) # Case 3: One magnitude + one fieldmap image in one folder / datasource newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') # Case 3 # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # The ADC images are not BIDS compliant if postfix == 'ADC': LOGGER.warning( f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file" ) # Save the nifti file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) # Rename all associated files (i.e. the json-, bval- and bvec-files) oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json') newjsonfile = newbidsfile.with_suffix('').with_suffix('.json') if not oldjsonfile.is_file(): LOGGER.warning( f"Unexpected file conversion result: {oldjsonfile} not found" ) else: if oldjsonfile in jsonfiles: jsonfiles.remove(oldjsonfile) if newjsonfile not in jsonfiles: jsonfiles.append(newjsonfile) for oldfile in outfolder.glob( dcm2niixfile.with_suffix('').stem + '.*'): oldfile.replace( newjsonfile.with_suffix(''.join(oldfile.suffixes))) # Copy over the source meta-data metadata = bids.copymetadata(sourcefile, outfolder / bidsname, options.get('meta', [])) # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Load the json meta-data with jsonfile.open('r') as json_fid: jsondata = json.load(json_fid) # Add all the source meta data to the meta-data for metakey, metaval in metadata.items(): if jsondata.get(metakey) == metaval: LOGGER.warning( f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}" ) jsondata[metakey] = metaval # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later for metakey, metaval in run['meta'].items(): if metakey != 'IntendedFor': metaval = datasource.dynamicvalue(metaval, cleanup=False, runtime=True) try: metaval = ast.literal_eval(str(metaval)) except (ValueError, SyntaxError): pass LOGGER.info( f"Adding '{metakey}: {metaval}' to: {jsonfile}") if not metaval: metaval = None jsondata[metakey] = metaval # Remove unused (but added from the template) B0FieldIdentifiers/Sources if not jsondata.get('B0FieldSource'): jsondata.pop('B0FieldSource', None) if not jsondata.get('B0FieldIdentifier'): jsondata.pop('B0FieldIdentifier', None) # Save the meta-data to the json sidecar-file with jsonfile.open('w') as json_fid: json.dump(jsondata, json_fid, indent=4) # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition) outputfile = [ file for file in jsonfile.parent.glob(jsonfile.stem + '.*') if file.suffix in ('.nii', '.gz') ] # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension) if not outputfile: LOGGER.exception( f"No data-file found with {jsonfile} when updating {scans_tsv}" ) elif datasource.datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datasource.datatype): acq_time = '' if dataformat == 'DICOM': acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}" elif dataformat == 'PAR': acq_time = datasource.attributes('exam_date') if not acq_time or acq_time == 'T': acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}" try: acq_time = dateutil.parser.parse(acq_time) if options.get('anon', 'y') in ('y', 'yes'): acq_time = acq_time.replace( year=1925, month=1, day=1 ) # Privacy protection (see BIDS specification) acq_time = acq_time.isoformat() except Exception as jsonerror: LOGGER.warning( f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}" ) acq_time = 'n/a' scanpath = outputfile[0].relative_to(bidsses) scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.replace('', 'n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a') # Collect personal data from a source header (PAR/XML does not contain personal info) personals = {} if sesid and 'session_id' not in personals: personals['session_id'] = sesid personals['age'] = '' if dataformat == 'DICOM': age = datasource.attributes( 'PatientAge' ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524 elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775 elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age: if options.get('anon', 'y') in ('y', 'yes'): age = int(float(age)) personals['age'] = str(age) personals['sex'] = datasource.attributes('PatientSex') personals['size'] = datasource.attributes('PatientSize') personals['weight'] = datasource.attributes('PatientWeight') # Store the collected personals in the participants_table participants_tsv = bidsfolder / 'participants.tsv' if participants_tsv.is_file(): participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str) participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True) else: participants_table = pd.DataFrame() participants_table.index.name = 'participant_id' if subid in participants_table.index and 'session_id' in participants_table.keys( ) and participants_table.loc[subid, 'session_id']: return # Only take data from the first session -> BIDS specification for key in personals: # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file if key not in participants_table or participants_table[key].isnull( ).get(subid, True) or participants_table[key].get(subid) == 'n/a': participants_table.loc[subid, key] = personals[key] # Write the collected data to the participants tsv-file LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}") participants_table.replace('', 'n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param dataformat: The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat) :param session: The full-path name of the subject/session source file/folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file if dataformat == 'DICOM': sourcefile = Path() sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) manufacturer = bids.get_dicomfield('Manufacturer', sourcefile) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return if not sources: LOGGER.info(f"No data found for: {session}") return subid, sesid = bids.get_subid_sesid(sourcefile, bidsmap[dataformat]['subject'], bidsmap[dataformat]['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders for source in sources: # Get a source-file if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap run, datatype, index = bids.get_matching_run(sourcefile, bidsmap, dataformat) # Check if we should ignore this run if datatype == bids.ignoredatatype: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if index is None: LOGGER.error( f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datatype else: outfolder = bidsses / datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec', 'tsv.gz'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile(source, 2).name: LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=outfolder, source=source) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*") ])) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if postfix[0] == 'e' and bids.get_bidsvalue( newbidsname, 'echo' ): # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix echonr = f"_{postfix}" # E.g. echonr='_e1' or echonr='_pha' for dcm2niixpostfix in dcm2niixpostfixes: echonr = echonr.replace( dcm2niixpostfix, '' ) # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a'] if echonr.isalpha(): echonr = ord( echonr ) - 95 # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc elif not echonr: echonr = 1 newbidsname = bids.get_bidsvalue( newbidsname, 'echo', str(echonr) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in ('magnitude', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'phasediff', 'fieldmap'): if len(dcm2niixfiles) not in ( 0, 2, 4, 6, 8 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.warning( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude') newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1') newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2') if len(dcm2niixfiles) == 8: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Two magnitude + 2 phase images in one folder / datasource else: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff' ) # One or two magnitude + 1 phasediff image newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2') newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2') newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase1_ph', '_phase1') newbidsname = newbidsname.replace( '_phase1_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_ph', '_phase2') # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # Save the file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) if ext == '.json': oldjsonfile = (outfolder / bidsname).with_suffix('.json') if oldjsonfile in jsonfiles and not oldjsonfile.is_file(): jsonfiles.remove( (outfolder / bidsname).with_suffix('.json')) jsonfiles.append(newbidsfile) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if datatype == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif datatype == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition) niifile = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) if niifile and datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datatype): with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data or not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'AcquisitionTime', sourcefile) # DICOM if not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'exam_date', sourcefile) # PAR/XML try: acq_time = dateutil.parser.parse(data['AcquisitionTime']) except: LOGGER.warning( f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}" ) acq_time = dateutil.parser.parse('00:00:00') scanpath = niifile[0].relative_to(bidsses) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk) if bidsmap[dataformat]['fmap'] is not None: for fieldmap in bidsmap[dataformat]['fmap']: bidsname = bids.get_bidsname(subid, sesid, fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label) jsonfiles = [] acqlabel = bids.get_bidsvalue(bidsname, 'acq') patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_magnitude*').replace('_magnitude2', '_magnitude*').replace( '_phase1', '_phase*').replace('_phase2', '_phase*'), bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_phase*').replace('_magnitude2', '_phase*')) for pattern in patterns: jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json')) if acqlabel: cepattern = bids.get_bidsvalue(pattern, 'acq', acqlabel + '[CE][0-9]*') jsonfiles.extend( list((bidsses / 'fmap').glob(cepattern + '.json'))) # Save the meta-data in the jsonfiles for jsonfile in sorted(set(jsonfiles)): # Add the IntendedFor data with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file if jsonfile.name.endswith('phasediff.json'): json_magnitude = [None, None] TE = [None, None] for n in (0, 1): json_magnitude[ n] = jsonfile.parent / jsonfile.name.replace( '_phasediff', f"_magnitude{n+1}") if not json_magnitude[n].is_file(): LOGGER.error( f"Could not find expected magnitude{n+1} image associated with: {jsonfile}" ) else: with json_magnitude[n].open('r') as json_fid: data = json.load(json_fid) TE[n] = data['EchoTime'] if None in TE: LOGGER.error( f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}" ) elif TE[0] > TE[1]: LOGGER.error( f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}" ) else: with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return # Only take data from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param dataformat: The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat) :param session: The full-path name of the subject/session source file/folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file if dataformat=='DICOM': sourcefile = Path() sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) if sourcefile.name: break elif dataformat=='PAR': sources = bids.get_parfiles(session) if sources: sourcefile = sources[0] else: LOGGER.error(f"Unsupported data format: {dataformat}\nPlease report this bug") return if not sources: LOGGER.info(f"No data found for: {session}") return subid, sesid = bids.get_subid_sesid(sourcefile, bidsmap[dataformat]['subject'], bidsmap[dataformat]['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder/subid/sesid if bidsses.is_dir(): LOGGER.warning(f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner") bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses/f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders for source in sources: # Get a source-file if dataformat=='DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat=='PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap run, modality, index = bids.get_matching_run(sourcefile, bidsmap, dataformat) # Check if we should ignore this run if modality == bids.ignoremodality: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if index is None: LOGGER.error(f"Skipping unknown '{modality}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning") continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/modality folder bidsmodality = bidsses/modality bidsmodality.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, modality, run) runindex = run['bids']['run'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(bidsmodality, bidsname) # Check if file already exists (-> e.g. when a static runindex is used). TODO: Future dcm2niix versions may contain a `-w 1` option: https://github.com/rordenlab/dcm2niix/issues/276 if (bidsmodality/bidsname).with_suffix('.json').is_file(): LOGGER.warning(f"{bidsmodality/bidsname}.* already exists and will be deleted -- check your results carefully!") for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec'): (bidsmodality/bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert the source-files in the run folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( path = bidsmap['Options']['dcm2niix']['path'], args = bidsmap['Options']['dcm2niix']['args'], filename = bidsname, outfolder = bidsmodality, source = source) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for filename in sorted(bidsmodality.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(filename.suffixes) newfilename = str(filename).rsplit(ext,1)[0].rsplit('_Crop_',1)[0] + ext LOGGER.info(f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}") filename.replace(newfilename) # Rename all files ending with _c%d, _e%d and _ph (and any combination of these) that are added by dcm2niix for multi-coil data, multi-echo data and phase data # See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md jsonfiles = [] # Collect the associated json-files (for updating them later) -- possibly > 1 for dcm2niisuffix in ('_c', '_e', '_ph', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt'): for filename in sorted(bidsmodality.glob(f"{bidsname}*{dcm2niisuffix}*")): ext = ''.join(filename.suffixes) basepath, index = str(filename).rsplit(ext)[0].rsplit(dcm2niisuffix,1) # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1) basesuffix = basepath.rsplit('_',1)[1] # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1 index = index.split('_')[0].zfill(2) # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files) # Phase data may be stored in the magnitude data source (e.g. Philips fieldmaps) if 'ph' in filename.name.rsplit(ext)[0].split('_'): basepath = basepath.replace('_magnitude', '_phase') # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file # https://github.com/rordenlab/dcm2niix/issues/381 if dcm2niisuffix in ('_c','_e') and int(index)==2 and basesuffix not in ['magnitude1', 'phase1']: # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below) filename_ce = Path(basepath + ext) # The file without the _c1/_e1 suffix if dcm2niisuffix=='_e' and bids.get_bidsvalue(basepath, 'echo'): newbasepath_ce = Path(bids.get_bidsvalue(basepath, 'echo', '1')) else: newbasepath_ce = Path(bids.get_bidsvalue(basepath, 'dummy', dcm2niisuffix.upper()[1:] + '1'.zfill(len(index)))) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data newfilename_ce = newbasepath_ce.with_suffix(ext) # The file as it should have been if filename_ce.is_file(): if filename_ce != newfilename_ce: LOGGER.warning(f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}\nConsider upgrading dcm2niix: https://github.com/rordenlab/dcm2niix/issues/381") if newfilename_ce.is_file(): LOGGER.warning(f"Overwriting existing {newfilename_ce} file -- check your results carefully!") filename_ce.replace(newfilename_ce) if ext == '.json': jsonfiles.append(newbasepath_ce.with_suffix('.json')) # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder) if dcm2niisuffix=='_e' and bids.get_bidsvalue(basepath, 'echo') and index: basepath = bids.get_bidsvalue(basepath, 'echo', str(int(index))) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness elif dcm2niisuffix=='_e' and basesuffix in ('magnitude1','magnitude2','phase1','phase2') and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str(int(index)) # basepath: *_magnitude1_e[index] -> *_magnitude[index] and *_phase1_e[index]_ph -> *_phase[index] elif dcm2niisuffix=='_e' and basesuffix=='phasediff' and index: # i.e. modality == 'fmap' pass else: basepath = bids.get_bidsvalue(basepath, 'dummy', dcm2niisuffix.upper()[1:] + index) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data # Save the file with a new name newbidsname = str(Path(basepath).name) if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex(bidsmodality, newbidsname, ext) # Update the runindex now that the acq-label has changed newfilename = (bidsmodality/newbidsname).with_suffix(ext) LOGGER.info(f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}") if newfilename.is_file(): LOGGER.warning(f"Overwriting existing {newfilename} file -- check your results carefully!") filename.replace(newfilename) if ext == '.json': jsonfiles.append((bidsmodality/newbidsname).with_suffix('.json')) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) if not jsonfiles: jsonfiles = [(bidsmodality/bidsname).with_suffix('.json')] for jsonfile in set(jsonfiles): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error(f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition) if bidsmodality.name not in bidsmap['Options']['bidscoin']['bidsignore']: with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data or not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield('AcquisitionTime', sourcefile) # DICOM if not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield('exam_date', sourcefile) # PAR/XML try: acq_time = dateutil.parser.parse(data['AcquisitionTime']) except: LOGGER.warning(f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}") acq_time = dateutil.parser.parse('00:00:00') scanpath = list(jsonfile.parent.glob(jsonfile.stem + '.nii*'))[0].relative_to(bidsses) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) scans_table.loc[scanpath.as_posix(), 'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time','filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Add IntendedFor and TE1+TE2 meta-data the fieldmap json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap[dataformat]['fmap'] is not None: for fieldmap in bidsmap[dataformat]['fmap']: bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend([Path(niifile).relative_to(bidsfolder/subid) for niifile in sorted(bidsses.rglob(f"*{selector}*.nii*")) if selector]) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Get the set of json-files (account for multiple runs in one data source and dcm2niix suffixes inserted into the acquisition label) jsonfiles = [] acqlabel = bids.get_bidsvalue(bidsname, 'acq') patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_'). replace('_magnitude1', '_magnitude*'). replace('_magnitude2', '_magnitude*'). replace('_phase1', '_phase*'). replace('_phase2', '_phase*'), bidsname.replace('_run-1_', '_run-[0-9]*_'). replace('_magnitude1', '_phase*'). replace('_magnitude2', '_phase*')) for pattern in patterns: jsonfiles.extend((bidsses/'fmap').glob(pattern + '.json')) if acqlabel: cepattern = bids.get_bidsvalue(pattern, 'acq', acqlabel + '[CE][0-9]*') jsonfiles.extend(list((bidsses/'fmap').glob(cepattern + '.json'))) # Save the meta-data in the jsonfiles for jsonfile in set(jsonfiles): # Add the IntendedFor data with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning(f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results") else: LOGGER.warning(f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty") data['IntendedFor'] = [niifile.as_posix() for niifile in niifiles] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file if jsonfile.name.endswith('phasediff.json'): json_magnitude = [None, None] TE = [None, None] for n in (0,1): json_magnitude[n] = jsonfile.parent/jsonfile.name.replace('_phasediff', f"_magnitude{n+1}") if not json_magnitude[n].is_file(): LOGGER.error(f"Could not find expected magnitude{n+1} image associated with: {jsonfile}") else: with json_magnitude[n].open('r') as json_fid: data = json.load(json_fid) TE[n] = data['EchoTime'] if None in TE: LOGGER.error(f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}") elif TE[0] > TE[1]: LOGGER.error(f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}") else: with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] LOGGER.info(f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}") with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat=='DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return # Only from the first session -> BIDS specification age = bids.get_dicomfield('PatientAge', sourcefile) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D'))/365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W'))/52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M'))/12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, subprefix: str = 'sub-', sesprefix: str = 'ses-', store: bool = False, interactive: bool = True) -> None: """ Main function that processes all the subjects and session in the sourcefolder and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param bidsfolder: The name of the BIDS root folder :param bidsmapfile: The name of the bidsmap YAML-file :param templatefile: The name of the bidsmap template YAML-file :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param store: If True, the provenance samples will be stored :param interactive: If True, the user will be asked for help if an unknown run is encountered :return:bidsmapfile: The name of the mapped bidsmap YAML-file """ # Input checking rawfolder = Path(rawfolder).resolve() bidsfolder = Path(bidsfolder).resolve() bidsmapfile = Path(bidsmapfile) templatefile = Path(templatefile) bidscoinfolder = bidsfolder / 'code' / 'bidscoin' # Start logging bids.setup_logging(bidscoinfolder / 'bidsmapper.log') LOGGER.info('') LOGGER.info('-------------- START BIDSmapper ------------') LOGGER.info( f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} " f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}" ) # Get the heuristics for filling the new bidsmap bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidscoinfolder) template, _ = bids.load_bidsmap(templatefile, bidscoinfolder) # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists) if bidsmap_old: bidsmap_new = copy.deepcopy(bidsmap_old) else: bidsmap_new = copy.deepcopy(template) for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'): for modality in bids.bidsmodalities + (bids.unknownmodality, bids.ignoremodality): if bidsmap_new[logic] and modality in bidsmap_new[logic]: bidsmap_new[logic][modality] = None # Start with an empty skeleton if we didn't have an old bidsmap if not bidsmap_old: bidsmap_old = copy.deepcopy(bidsmap_new) # Start the Qt-application gui = interactive if gui: app = QApplication(sys.argv) app.setApplicationName('BIDS editor') mainwin = bidseditor.MainWindow() gui = bidseditor.Ui_MainWindow() gui.interactive = interactive gui.subprefix = subprefix gui.sesprefix = sesprefix if gui.interactive == 2: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper will now scan {bidsfolder} and whenever " f"it detects a new type of scan it will ask you to identify it.\n\n" f"It is important that you choose the correct BIDS modality " f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n" f"At the end you will be shown an overview of all the " f"different scan types and BIDScoin options (as in the " f"bidseditor) that you can then (re)edit to your needs") # Loop over all subjects and sessions and built up the bidsmap entries dataformat = '' subjects = bids.lsdirs(rawfolder, subprefix + '*') if not subjects: LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*') gui = None for n, subject in enumerate(subjects, 1): sessions = bids.lsdirs(subject, sesprefix + '*') if not sessions: sessions = [subject] for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file session, unpacked = bids.unpack(session, subprefix, sesprefix, '*') if unpacked: store = dict(source=unpacked, target=bidscoinfolder / 'provenance') elif store: store = dict(source=rawfolder, target=bidscoinfolder / 'provenance') else: store = dict() # Loop of the different DICOM runs (series) and collect source files sourcefiles = [] dataformat = bids.get_dataformat(session) if not dataformat: LOGGER.info( f"Skipping: {session} (subject {n}/{len(subjects)})") continue LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})") if dataformat == 'DICOM': for sourcedir in bids.lsdirs(session): sourcefile = bids.get_dicomfile(sourcedir) if sourcefile.name: sourcefiles.append(sourcefile) if dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) if dataformat == 'P7': sourcefiles = bids.get_p7file(session) # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: bidsmap_new = build_bidsmap(dataformat, sourcefile, bidsmap_new, bidsmap_old, template, store, gui) # Update / append the nifti mapping if dataformat == 'Nifti': bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old) # Update / append the file-system mapping if dataformat == 'FileSystem': bidsmap_new = build_filesystemmap(session, bidsmap_new, bidsmap_old) # Update / append the plugin mapping if bidsmap_old['PlugIns']: bidsmap_new = build_pluginmap(session, bidsmap_new, bidsmap_old) # Clean-up the temporary unpacked data if unpacked: shutil.rmtree(session) if not dataformat: LOGGER.warning('Could not determine the dataformat of the source data') # (Re)launch the bidseditor UI_MainWindow bidsmapfile = bidscoinfolder / 'bidsmap.yaml' if gui: if not dataformat: QMessageBox.information( mainwin, 'BIDS mapping workflow', 'Could not determine the dataformat of the source data.\n' 'You can try running the bidseditor tool yourself') else: QMessageBox.information( mainwin, 'BIDS mapping workflow', f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " f"and BIDScoin options and (re)edit them to your needs.\n\n" f"You can always redo this step later by re-running the " f"bidsmapper or by just running the bidseditor tool") LOGGER.info('Opening the bidseditor') gui.setupUi(mainwin, bidsfolder, bidsmapfile, bidsmap_new, copy.deepcopy(bidsmap_new), template, dataformat, subprefix=subprefix, sesprefix=sesprefix) mainwin.show() app.exec() else: # Save the bidsmap in the bidscoinfolder bids.save_bidsmap(bidsmapfile, bidsmap_new) LOGGER.info('-------------- FINISHED! -------------------') LOGGER.info('') bids.reporterrors()
def scanparticipant(dataformat: str, session: Path, personals: dict, subid: str, sesid: str) -> bool: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source file/folder :param personals: The dictionary with the personal information :param subid: The subject-id from the bids-folder :param sesid: The session-id from the bids-folder :return: True if successful """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file sourcefile = Path() if dataformat == 'DICOM': sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return False if not sources: LOGGER.info(f"No data found for: {session}") return False # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return False # Only from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile) return True