def built_dicommap(dicomfile: str, bidsmap: dict, heuristics: dict) -> dict: """ All the logic to map dicomfields onto bids labels go into this function :param dicomfile: The full-path name of the source dicom-file :param bidsmap: The bidsmap as we had it :param heuristics: Full BIDS heuristics data structure, with all options, BIDS labels and attributes, etc :return: The bidsmap with new entries in it """ # Get the bidsmodality and dirname (= bidslabel) from the pathname (samples/bidsmodality/[dirname/]dicomfile) dirname = os.path.basename(os.path.dirname(dicomfile)) if dirname in bids.bidsmodalities: bidsmodality = dirname else: bidsmodality = os.path.basename(os.path.dirname(os.path.dirname(dicomfile))) # Input checks if not dicomfile or not heuristics['DICOM'] or not heuristics['DICOM'][bidsmodality]: return bidsmap if bidsmodality not in bids.bidsmodalities: raise ValueError("Don't know what to do with this bidsmodality directory name: {}\n{}".format(bidsmodality, dicomfile)) # Copy the bids-labels over from the matching series in heuristics to series_, Then fill the attributes and append it to bidsmap for series in heuristics['DICOM'][bidsmodality]: match = False # series_ = dict() # The CommentedMap API below is not guaranteed for the future so keep this line as an alternative series_ = ruamel.yaml.comments.CommentedMap() # Creating a new object is safe in that we don't change the original heuristics object. However, we lose all comments and formatting within the series (which is not such a disaster probably). It is also much faster and more robust with aliases compared with a deepcopy # Copy the bids labels for the different bidsmodality matches if bidsmodality == 'beh': # beh should not have subdirectories as it (in the current BIDS version doesn't have a suffix); however, it is kind of irrelevant as beh probably never has a dicomfile anyway? series_['bids'] = series['bids'] match = True else: if ('modality_label' in series['bids'] and dirname==series['bids']['modality_label']) or ('suffix' in series['bids'] and dirname==series['bids']['suffix']): # NB: modality_label & suffix are more or less the same thing, but perhaps future versions will make a distinction series_['bids'] = series['bids'] match = True if match: # Fill the empty attribute with the info from the dicomfile # series_['attributes'] = dict() # The CommentedMap API below is not guaranteed for the future so keep this line as an alternative series_['attributes'] = ruamel.yaml.comments.CommentedMap() # Clear the yaml objects that were copied over series_.yaml_add_eol_comment('From: ' + dicomfile, key='attributes', column=50) # Add provenance data series_['provenance'] = dicomfile for attrkey in series['attributes']: series_['attributes'][attrkey] = bids.get_dicomfield(attrkey, dicomfile) # Copy the filled-in series over to the bidsmap if bidsmap['DICOM'][bidsmodality] is None: bidsmap['DICOM'][bidsmodality] = [series_] elif not bids.exist_series(series_, bidsmap['DICOM'][bidsmodality]): bidsmap['DICOM'][bidsmodality].append(series_) return bidsmap raise ValueError(f"Oops, this should not happen! BIDS modality '{bidsmodality}' or one of the bidslabels is not accounted for in the code\n{dicomfile}")
def built_dicommap(dicomfile, bidsmap, heuristics): """ All the logic to map dicomfields onto bids labels go into this function :param str dicomfile: The full-path name of the source dicom-file :param dict bidsmap: The bidsmap as we had it :param dict heuristics: Full BIDS heuristics data structure, with all options, BIDS labels and attributes, etc :return: The bidsmap with new entries in it :rtype: dict """ # Get the bidsmodality and dirname (= bidslabel) from the pathname (samples/bidsmodality/[dirname/]dicomfile) dirname = os.path.basename(os.path.dirname(dicomfile)) if dirname in bids.bidsmodalities: bidsmodality = dirname else: bidsmodality = os.path.basename(os.path.dirname(os.path.dirname(dicomfile))) # Input checks if not dicomfile or not heuristics['DICOM'] or not heuristics['DICOM'][bidsmodality]: return bidsmap if bidsmodality not in bids.bidsmodalities: raise ValueError("Don't know what to do with this bidsmodality directory name: {}\n{}".format(bidsmodality, dicomfile)) # Copy the bids-labels over from the matching series in heuristics to series_, Then fill the attributes and append it to bidsmap for series in heuristics['DICOM'][bidsmodality]: match = False series_ = dict() # Creating a new object is safe in that we don't change the original heuristics object. However, we lose all comments and formatting within the series (which is not such a disaster probably). It is also much faster and more robust with aliases compared with a deepcopy # Copy the bids labels for the different bidsmodality matches if bidsmodality == 'beh': # beh should not have subdirectories as it (in the cuurent BIDS version doesn't have a suffix) for key in series: series_[key] = series[key] match = True else: if ('modality_label' in series and dirname==series['modality_label']) or ('suffix' in series and dirname==series['suffix']): for key in series: series_[key] = series[key] match = True if match: # Fill the empty attribute with the info from the dicomfile series_['attributes'] = dict() # Clear the yaml objects that were copied over for attrkey in series['attributes']: series_['attributes'][attrkey] = bids.get_dicomfield(attrkey, dicomfile) # Copy the filled-in series over to the bidsmap if bidsmap['DICOM'][bidsmodality] is None: bidsmap['DICOM'][bidsmodality] = [series_] elif not bids.exist_series(series_, bidsmap['DICOM'][bidsmodality]): bidsmap['DICOM'][bidsmodality].append(series_) return bidsmap raise ValueError("Oops, this should not happen! BIDS modality '{}' or one of the bidslabels is not accounted for in the code\n{}".format(bidsmodality, dicomfile))
def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options: dict) -> Union[str, int]: """ This plugin supports reading attributes from DICOM and PAR dataformats :param dataformat: The bidsmap-dataformat of the sourcefile, e.g. DICOM of PAR :param sourcefile: The sourcefile from which the attribute value should be read :param attribute: The attribute key for which the value should be read :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options'] :return: The attribute value """ if dataformat == 'DICOM': return bids.get_dicomfield(attribute, sourcefile) if dataformat == 'PAR': return bids.get_parfield(attribute, sourcefile)
def rawmapper(rawfolder: str, outfolder: str='', sessions: list=[], rename: bool=False, dicomfield: tuple=('PatientComments',), wildcard: str='*', subprefix: str='sub-', sesprefix: str='ses-', dryrun: bool=False) -> None: """ :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param outfolder: The name of the folder where the mapping-file is saved (default = rawfolder) :param sessions: Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected :param rename: Flag for renaming the sub-subid folders to sub-dicomfield :param dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing """ # Input checking if not outfolder: outfolder = rawfolder rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) outfolder = os.path.abspath(os.path.expanduser(outfolder)) # Create or append the output to a mapper logfile if not dryrun: mapperfile = os.path.join(outfolder, 'rawmapper_{}.tsv'.format('_'.join(dicomfield))) if rename: with open(mapperfile, 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'newsubid', 'newsesid')) else: with open(mapperfile, 'x') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'seriesname', '\t'.join(dicomfield))) # Map the sessions in the rawfolder if not sessions: sessions = glob.glob(os.path.join(rawfolder, f'{subprefix}*{os.sep}{sesprefix}*')) if not sessions: sessions = glob.glob(os.path.join(rawfolder, f'{subprefix}*')) # Try without session-subfolders else: sessions = [sessionitem for session in sessions for sessionitem in glob.glob(os.path.join(rawfolder, session), recursive=True)] # Loop over the selected sessions in the rawfolder for session in sessions: # Get the subject and session identifiers from the raw folder subid = subprefix + session.rsplit(os.sep+subprefix, 1)[1].split(os.sep+sesprefix, 1)[0] sesid = sesprefix + session.rsplit(os.sep+sesprefix)[1] # TODO: Fix crashing on session-less datasets # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # TODO: loop over series? dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str(bids.get_dicomfield(dcmfield, bids.get_dicomfile(series))) dcmval = dcmval[1:] # Rename the session subfolder in the rawfolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval=='None': warnings.warn('Skipping renaming because the dicom-field was empty for: ' + session) continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\r\n' newsubsesid = [val for val in dcmval.split(delim) if val] # Skip empty lines / entries newsubid = subprefix + bids.cleanup_label(newsubsesid[0].lstrip(subprefix)) if newsubid==subprefix or newsubid==subprefix+'None': newsubid = subid warnings.warn('Could not rename {} because the dicom-field was empty for: {}'.format(subid, session)) if len(newsubsesid)==1: newsesid = sesid elif len(newsubsesid)==2: newsesid = sesprefix + bids.cleanup_label(newsubsesid[1].lstrip(sesprefix)) else: warnings.warn('Skipping renaming of {} because the dicom-field "{}" could not be parsed into [subid, sesid]'.format(session, dcmval)) continue if newsesid==sesprefix or newsesid==subprefix+'None': newsesid = sesid warnings.warn('Could not rename {} because the dicom-field was empty for: {}'.format(sesid, session)) # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists) newsession = os.path.join(rawfolder, newsubid, newsesid) print(session + ' -> ' + newsession) if newsession == session: continue if os.path.isdir(newsession): warnings.warn('{} already exists, skipping renaming of {}'.format(newsession, session)) elif not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, newsubid, newsesid)) os.renames(session, newsession) # Print & save the dicom values else: print('{}{}{}\t-> {}'.format(subid+os.sep, sesid+os.sep, os.path.basename(series), '\t'.join(dcmval.split('/')))) if not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, os.path.basename(series), '\t'.join(dcmval.split('/'))))
def rawmapper(rawfolder, outfolder=None, rename=False, dicomfield=('PatientComments', ), wildcard='*', dryrun=False): """ :param str rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param str outfolder: The name of the folder where the mapping-file is saved (default = rawfolder) :param bool rename: Flag for renaming the sub-subid folders to sub-dicomfield :param list dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param str wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param bool dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing :rtype: NoneType """ # Input checking if not outfolder: outfolder = rawfolder rawfolder = os.path.abspath(os.path.expanduser(rawfolder)) outfolder = os.path.abspath(os.path.expanduser(outfolder)) # Create a output mapper-file if not dryrun: mapperfile = os.path.join( outfolder, 'rawmapper_{}.tsv'.format('_'.join(dicomfield))) if rename: with open(mapperfile, 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'newsubid', 'newsesid')) else: with open(mapperfile, 'x') as fid: fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'seriesname', '\t'.join(dicomfield))) # Loop over all subjects and sessions in the rawfolder for subject in bids.lsdirs(rawfolder, 'sub-*'): sessions = bids.lsdirs(subject, 'ses-*') if not sessions: sessions = subject for session in sessions: # Get the subject and session identifiers from the raw folder subid = 'sub-' + session.rsplit(os.sep + 'sub-', 1)[1].split( os.sep + 'ses-', 1)[0] sesid = 'ses-' + session.rsplit(os.sep + 'ses-')[1] # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # TODO: loop over series? dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str( bids.get_dicomfield(dcmfield, bids.get_dicomfile(series)) ) # TODO: test how newlines from the console work out dcmval = dcmval[1:] # Rename the session subfolder in the rawfolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval == 'None': warnings.warn( 'Skipping renaming because the dicom-field was empty for: ' + session) continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\n' newsubsesid = dcmval.split(delim) newsubid = 'sub-' + bids.cleanup_label( newsubsesid[0].replace('sub-', '')) if newsubid == 'sub-' or newsubid == 'sub-None': newsubid = subid warnings.warn( 'Could not rename {} because the dicom-field was empty for: {}' .format(subid, session)) if len(newsubsesid) == 1: newsesid = sesid elif len(newsubsesid) == 2: newsesid = 'ses-' + bids.cleanup_label( newsubsesid[1].replace('ses-', '')) else: warnings.warn( 'Skipping renaming of {} because the dicom-field "{}" could not be parsed into [subid, sesid]' .format(session, dcmval)) continue if newsesid == 'ses-' or newsesid == 'ses-None': newsesid = sesid warnings.warn( 'Could not rename {} because the dicom-field was empty for: {}' .format(sesid, session)) # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists) newsession = os.path.join(rawfolder, newsubid, newsesid) print(session + ' -> ' + newsession) if newsession == session: continue if os.path.isdir(newsession): warnings.warn( '{} already exists, skipping renaming of {}'.format( newsession, session)) elif not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format( subid, sesid, newsubid, newsesid)) os.renames(session, newsession) # Print & save the dicom values else: print('{}{}{}\t-> {}'.format(subid + os.sep, sesid + os.sep, os.path.basename(series), '\t'.join(dcmval.split('/')))) if not dryrun: with open(os.path.join(outfolder, mapperfile), 'a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format( subid, sesid, os.path.basename(series), '\t'.join(dcmval.split('/'))))
def sortsession(sessionfolder: Path, dicomfiles: list, dicomfield: str, rename: bool, ext: str, nosort: bool, dryrun: bool) -> None: """ Sorts dicomfiles into (3-digit) SeriesNumber-SeriesDescription subfolders (e.g. '003-T1MPRAGE') :param sessionfolder: The name of the destination folder of the dicom files :param dicomfiles: The list of dicomfiles to be sorted and/or renamed :param dicomfield: The dicomfield that is used to construct the series folder name (e.g. SeriesDescription or ProtocolName, which are both used as fallback) :param rename: Boolean to rename the DICOM files to a PatientName_SeriesNumber_SeriesDescription_AcquisitionNumber_InstanceNumber scheme :param ext: The file extension after sorting (empty value keeps original file extension) :param nosort: Boolean to skip sorting of DICOM files into SeriesNumber-SeriesDescription directories (useful in combination with -r for renaming only) :param dryrun: Boolean to just display the action :return: Nothing """ # Map all dicomfiles and move them to series folders LOGGER.info(f">> Sorting: {sessionfolder} ({len(dicomfiles)} files)") if not dryrun: sessionfolder.mkdir(parents=True, exist_ok=True) seriesdirs = [] for dicomfile in dicomfiles: # Extract the SeriesDescription and SeriesNumber from the dicomfield seriesnr = bids.get_dicomfield('SeriesNumber', dicomfile) if not seriesnr: LOGGER.warning( f"No SeriesNumber found, skipping: {dicomfile}" ) # This is not a normal DICOM file, better not do anything with it continue seriesdescr = bids.get_dicomfield(dicomfield, dicomfile) if not seriesdescr: seriesdescr = bids.get_dicomfield('SeriesDescription', dicomfile) if not seriesdescr: seriesdescr = bids.get_dicomfield('ProtocolName', dicomfile) if not seriesdescr: seriesdescr = 'unknown_protocol' LOGGER.warning( f"No {dicomfield}, SeriesDecription or ProtocolName found for: {dicomfile}" ) if rename: acquisitionnr = bids.get_dicomfield('AcquisitionNumber', dicomfile) instancenr = bids.get_dicomfield('InstanceNumber', dicomfile) if not instancenr: instancenr = bids.get_dicomfield( 'ImageNumber', dicomfile ) # This Attribute was named Image Number in earlier versions of this Standard patientname = bids.get_dicomfield('PatientName', dicomfile) if not patientname: patientname = bids.get_dicomfield( 'PatientsName', dicomfile ) # This Attribute was/is sometimes called PatientsName? # Move and/or rename the dicomfile in(to) the (series sub)folder if rename and not (patientname and seriesnr and seriesdescr and acquisitionnr and instancenr): LOGGER.warning( f"Missing one or more essential DICOM-fields, cannot safely rename {dicomfile}\n" f"patientname = {patientname}\n" f"seriesnumber = {seriesnr}\n" f"{dicomfield} = {seriesdescr}\n" f"acquisitionnr = {acquisitionnr}\n" f"instancenr = {instancenr}") filename = dicomfile.name elif rename: filename = cleanup( f"{patientname}_{seriesnr:03d}_{seriesdescr}_{acquisitionnr:05d}_{instancenr:05d}{ext}" ) else: filename = dicomfile.name if nosort: pathname = sessionfolder else: # Create the series subfolder seriesdir = cleanup(f"{seriesnr:03d}-{seriesdescr}") if seriesdir not in seriesdirs: # We have a new series if not (sessionfolder / seriesdir).is_dir(): LOGGER.info(f" Creating: {sessionfolder/seriesdir}") if not dryrun: (sessionfolder / seriesdir).mkdir(parents=True) seriesdirs.append(seriesdir) pathname = sessionfolder / seriesdir if ext: newfilename = (pathname / filename).with_suffix(ext) else: newfilename = pathname / filename if newfilename.is_file(): LOGGER.warning( f"File already exists: {dicomfile} -> {newfilename}") newfilename = newfilename.with_name(newfilename.stem + str(uuid.uuid4()) + newfilename.suffix) LOGGER.info(f"Using new file-name: {dicomfile} -> {newfilename}") if not dryrun: dicomfile.replace(newfilename)
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param dataformat: The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat) :param session: The full-path name of the subject/session source file/folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file if dataformat == 'DICOM': sourcefile = Path() sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) manufacturer = bids.get_dicomfield('Manufacturer', sourcefile) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) manufacturer = 'Philips Medical Systems' if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return if not sources: LOGGER.info(f"No data found for: {session}") return subid, sesid = bids.get_subid_sesid(sourcefile, bidsmap[dataformat]['subject'], bidsmap[dataformat]['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the source files or run subfolders for source in sources: # Get a source-file if dataformat == 'DICOM': sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source if not sourcefile.name: continue # Get a matching run from the bidsmap run, datatype, index = bids.get_matching_run(sourcefile, bidsmap, dataformat) # Check if we should ignore this run if datatype == bids.ignoredatatype: LOGGER.info(f"Leaving out: {source}") continue # Check if we already know this run if index is None: LOGGER.error( f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning" ) continue LOGGER.info(f"Processing: {source}") # Create the BIDS session/datatype output folder if run['bids']['suffix'] in bids.get_derivatives(datatype): outfolder = bidsfolder / 'derivatives' / manufacturer.replace( ' ', '') / subid / sesid / datatype else: outfolder = bidsses / datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, run) runindex = run['bids'].get('run', '') if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(outfolder, bidsname) jsonfiles = [ (outfolder / bidsname).with_suffix('.json') ] # List -> Collect the associated json-files (for updating them later) -- possibly > 1 # Check if file already exists (-> e.g. when a static runindex is used) if (outfolder / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!" ) for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec', 'tsv.gz'): (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True) # Convert physiological log files (dcm2niix can't handle these) if run['bids']['suffix'] == 'physio': if bids.get_dicomfile(source, 2).name: LOGGER.warning( f"Found > 1 DICOM file in {source}, using: {sourcefile}") physiodata = physio.readphysio(sourcefile) physio.physio2tsv(physiodata, outfolder / bidsname) # Convert the source-files in the run folder to nifti's in the BIDS-folder else: command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=outfolder, source=source) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for dcm2niixfile in sorted( outfolder.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(dcm2niixfile.suffixes) newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}" ) dcm2niixfile.replace(newbidsfile) # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt', '_e', '_ph') dcm2niixfiles = sorted( set([ dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes for dcm2niixfile in outfolder.glob( f"{bidsname}*{dcm2niixpostfix}*") ])) for dcm2niixfile in dcm2niixfiles: ext = ''.join(dcm2niixfile.suffixes) postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit( ext)[0].split('_')[1:] newbidsname = dcm2niixfile.name # Strip the additional postfixes and assign them to bids entities in the for-loop below for postfix in postfixes: # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder if postfix[0] == 'e' and bids.get_bidsvalue( newbidsname, 'echo' ): # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix echonr = f"_{postfix}" # E.g. echonr='_e1' or echonr='_pha' for dcm2niixpostfix in dcm2niixpostfixes: echonr = echonr.replace( dcm2niixpostfix, '' ) # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a'] if echonr.isalpha(): echonr = ord( echonr ) - 95 # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc elif not echonr: echonr = 1 newbidsname = bids.get_bidsvalue( newbidsname, 'echo', str(echonr) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file) elif run['bids']['suffix'] in ('magnitude', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'phasediff', 'fieldmap'): if len(dcm2niixfiles) not in ( 0, 2, 4, 6, 8 ): # Phase / echo data may be stored in the same data source / run folder LOGGER.warning( f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'" ) newbidsname = newbidsname.replace( '_fieldmap_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude_e1', '_magnitude') newbidsname = newbidsname.replace( '_magnitude_ph', '_fieldmap') newbidsname = newbidsname.replace( '_magnitude1_e1', '_magnitude1') newbidsname = newbidsname.replace( '_magnitude2_e1', '_magnitude1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_magnitude1_e2', '_magnitude2') if len(dcm2niixfiles) == 8: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phase1' ) # Two magnitude + 2 phase images in one folder / datasource else: newbidsname = newbidsname.replace( '_magnitude1_ph', '_phasediff' ) # One or two magnitude + 1 phasediff image newbidsname = newbidsname.replace( '_magnitude1a', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude1_pha', '_phase2') newbidsname = newbidsname.replace( '_magnitude2_e2', '_magnitude2') newbidsname = newbidsname.replace( '_magnitude2_ph', '_phase2') newbidsname = newbidsname.replace( '_phase1_e1', '_phase1') newbidsname = newbidsname.replace( '_phase2_e1', '_phase1' ) # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first newbidsname = newbidsname.replace( '_phase1_ph', '_phase1') newbidsname = newbidsname.replace( '_phase1_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_e2', '_phase2') newbidsname = newbidsname.replace( '_phase2_ph', '_phase2') # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data else: newbidsname = bids.get_bidsvalue( newbidsname, 'dummy', postfix) # Remove the added postfix from the new bidsname newbidsname = newbidsname.replace(f"_{postfix}_", '_') # If it is not last newbidsname = newbidsname.replace(f"_{postfix}.", '.') # If it is last # Save the file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( outfolder, newbidsname, '' ) # Update the runindex now that the acq-label has changed newbidsfile = outfolder / newbidsname LOGGER.info( f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}" ) if newbidsfile.is_file(): LOGGER.warning( f"Overwriting existing {newbidsfile} file -- check your results carefully!" ) dcm2niixfile.replace(newbidsfile) if ext == '.json': oldjsonfile = (outfolder / bidsname).with_suffix('.json') if oldjsonfile in jsonfiles and not oldjsonfile.is_file(): jsonfiles.remove( (outfolder / bidsname).with_suffix('.json')) jsonfiles.append(newbidsfile) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) for jsonfile in sorted(set(jsonfiles)): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if datatype == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif datatype == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition) niifile = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) if niifile and datatype not in bidsmap['Options']['bidscoin'][ 'bidsignore'] and not run['bids'][ 'suffix'] in bids.get_derivatives(datatype): with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data or not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'AcquisitionTime', sourcefile) # DICOM if not data['AcquisitionTime']: data['AcquisitionTime'] = bids.get_sourcefield( 'exam_date', sourcefile) # PAR/XML try: acq_time = dateutil.parser.parse(data['AcquisitionTime']) except: LOGGER.warning( f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}" ) acq_time = dateutil.parser.parse('00:00:00') scanpath = niifile[0].relative_to(bidsses) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk) if bidsmap[dataformat]['fmap'] is not None: for fieldmap in bidsmap[dataformat]['fmap']: bidsname = bids.get_bidsname(subid, sesid, fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label) jsonfiles = [] acqlabel = bids.get_bidsvalue(bidsname, 'acq') patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_magnitude*').replace('_magnitude2', '_magnitude*').replace( '_phase1', '_phase*').replace('_phase2', '_phase*'), bidsname.replace('_run-1_', '_run-[0-9]*_').replace( '_magnitude1', '_phase*').replace('_magnitude2', '_phase*')) for pattern in patterns: jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json')) if acqlabel: cepattern = bids.get_bidsvalue(pattern, 'acq', acqlabel + '[CE][0-9]*') jsonfiles.extend( list((bidsses / 'fmap').glob(cepattern + '.json'))) # Save the meta-data in the jsonfiles for jsonfile in sorted(set(jsonfiles)): # Add the IntendedFor data with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file if jsonfile.name.endswith('phasediff.json'): json_magnitude = [None, None] TE = [None, None] for n in (0, 1): json_magnitude[ n] = jsonfile.parent / jsonfile.name.replace( '_phasediff', f"_magnitude{n+1}") if not json_magnitude[n].is_file(): LOGGER.error( f"Could not find expected magnitude{n+1} image associated with: {jsonfile}" ) else: with json_magnitude[n].open('r') as json_fid: data = json.load(json_fid) TE[n] = data['EchoTime'] if None in TE: LOGGER.error( f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}" ) elif TE[0] > TE[1]: LOGGER.error( f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}" ) else: with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return # Only take data from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
def coin_dicom(session: str, bidsmap: dict, bidsfolder: str, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ if not bids.lsdirs(session): logger.warning('No series subfolder(s) found in: ' + session) return TE = [None, None] # Get a valid BIDS subject identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['participant_label'] and bidsmap['DICOM'][ 'participant_label'].startswith( '<<') and bidsmap['DICOM']['participant_label'].endswith('>>'): subid = bids.get_dicomfield( bidsmap['DICOM']['participant_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['participant_label']: subid = bidsmap['DICOM']['participant_label'] else: subid = session.rsplit(os.sep + subprefix, 1)[1].split(os.sep + sesprefix, 1)[0] subid = 'sub-' + bids.cleanup_label(subid.lstrip(subprefix)) if subid == subprefix: logger.error('Error: No valid subject identifier found for: ' + session) return # Get a valid or empty BIDS session identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['session_label'] and bidsmap['DICOM'][ 'session_label'].startswith( '<<') and bidsmap['DICOM']['session_label'].endswith('>>'): sesid = bids.get_dicomfield( bidsmap['DICOM']['session_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['session_label']: sesid = bidsmap['DICOM']['session_label'] elif os.sep + sesprefix in session: sesid = session.rsplit(os.sep + sesprefix)[1] else: sesid = '' if sesid: sesid = 'ses-' + bids.cleanup_label(sesid.lstrip(sesprefix)) # Create the BIDS session-folder and a scans.tsv file bidsses = os.path.join( bidsfolder, subid, sesid ) # NB: This gives a trailing '/' if ses=='', but that should be ok os.makedirs(bidsses, exist_ok=True) scans_tsv = os.path.join(bidsses, f'{subid}{bids.add_prefix("_",sesid)}_scans.tsv') if os.path.exists(scans_tsv): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the dicom series subfolders for series in bids.lsdirs(session): if series.startswith('.'): logger.info('Ignoring hidden dicom-folder: ' + series) continue else: logger.info('Processing dicom-folder: ' + series) # Get the cleaned-up bids labels from a dicom-file and bidsmap dicomfile = bids.get_dicomfile(series) if not dicomfile: continue result = bids.get_matching_dicomseries(dicomfile, bidsmap) series_ = result['series'] modality = result['modality'] # Create the BIDS session/modality folder bidsmodality = os.path.join(bidsses, modality) os.makedirs(bidsmodality, exist_ok=True) # Compose the BIDS filename using the bids labels and run-index runindex = series_['bids']['run_index'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex[2:-2]) bidsname = bids.increment_runindex(bidsmodality, bidsname) else: bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex) # Convert the dicom-files in the series folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=series) logger.info('$ ' + command) process = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) # TODO: investigate shell=False and capture_output=True logger.info(process.stdout.decode('utf-8')) if process.returncode != 0: errormsg = f'Error: Failed to process {series} (errorcode {process.returncode})' logger.error(errormsg) continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for filename in sorted( glob.glob(os.path.join( bidsmodality, bidsname + '*_Crop_*'))): # e.g. *_Crop_1.nii.gz basepath, ext1 = os.path.splitext(filename) basepath, ext2 = os.path.splitext( basepath) # Account for .nii.gz files basepath = basepath.rsplit('_Crop_', 1)[0] newfilename = basepath + ext2 + ext1 logger.info( f'Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}' ) os.replace(filename, newfilename) # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) -- possibly > 1 for suffix in ('_c', '_e', '_ph', '_i'): for filename in sorted( glob.glob( os.path.join(bidsmodality, bidsname + suffix + '[0-9]*'))): basepath, ext1 = os.path.splitext(filename) basepath, ext2 = os.path.splitext( basepath) # Account for .nii.gz files basepath, index = basepath.rsplit(suffix, 1) index = index.split('_')[0].zfill( 2 ) # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files) # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file if suffix in ( '_c', '_e' ) and int(index) == 2 and basepath.rsplit( '_', 1 )[1] != 'magnitude1': # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below) filename_ce = basepath + ext2 + ext1 # The file without the _c1/_e1 suffix if suffix == '_e' and bids.set_bidslabel(basepath, 'echo'): newbasepath_ce = bids.set_bidslabel( basepath, 'echo', '1') else: newbasepath_ce = bids.set_bidslabel( basepath, 'dummy', suffix.upper() + '1'.zfill(len(index)) ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data newfilename_ce = newbasepath_ce + ext2 + ext1 # The file as it should have been if os.path.isfile(filename_ce): if filename_ce != newfilename_ce: logger.info( f'Found no dcm2niix {suffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}' ) os.rename(filename_ce, newfilename_ce) if ext1 == '.json': jsonfiles.append(newbasepath_ce + '.json') # Patch the basepath with the suffix info if suffix == '_e' and bids.set_bidslabel(basepath, 'echo') and index: basepath = bids.set_bidslabel( basepath, 'echo', str(int(index)) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness elif suffix == '_e' and basepath.rsplit('_', 1)[1] in ( 'magnitude1', 'magnitude2') and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_magnitude1_e[index] -> *_magnitude[index] # Read the echo times that need to be added to the json-file (see below) if os.path.splitext(filename)[1] == '.json': with open(filename, 'r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] logger.info( f"Reading EchoTime{index} = {data['EchoTime']} from: {filename}" ) elif suffix == '_e' and basepath.rsplit( '_', 1 )[1] == 'phasediff' and index: # i.e. modality == 'fmap' pass elif suffix == '_ph' and basepath.rsplit('_', 1)[1] in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' (TODO: untested) basepath = basepath[0:-1] + str( int(index )) # basepath: *_phase1_e[index] -> *_phase[index] logger.warning('Untested dcm2niix "_ph"-filetype: ' + basepath) else: basepath = bids.set_bidslabel( basepath, 'dummy', suffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data # Save the file with a new name if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, os.path.basename(basepath), ext2 + ext1 ) # Update the runindex now that the acq-label has changed else: newbidsname = os.path.basename(basepath) newfilename = os.path.join(bidsmodality, newbidsname + ext2 + ext1) logger.info( f'Found dcm2niix {suffix} suffix, renaming\n{filename} ->\n{newfilename}' ) os.rename(filename, newfilename) if ext1 == '.json': jsonfiles.append( os.path.join(bidsmodality, newbidsname + '.json')) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) if not jsonfiles: jsonfiles = [os.path.join(bidsmodality, bidsname + '.json')] for jsonfile in set(jsonfiles): # Check if dcm2niix behaved as expected if not os.path.isfile(jsonfile): logger.warning( f'Unexpected file conbids.version result: {jsonfile} not found' ) continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = os.path.splitext(jsonfile)[0] + '.bvec' bvalfile = os.path.splitext(jsonfile)[0] + '.bval' if not os.path.isfile(bvecfile): logger.info('Adding dummy bvec file: ' + bvecfile) with open(bvecfile, 'w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not os.path.isfile(bvalfile): logger.info('Adding dummy bval file: ' + bvalfile) with open(bvalfile, 'w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: logger.info('Adding TaskName to: ' + jsonfile) data['TaskName'] = series_['bids']['task_label'] with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude series have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-series being saved after the magnitude series elif modality == 'fmap': if series_['bids']['suffix'] == 'phasediff': logger.info('Adding EchoTime1 and EchoTime2 to: ' + jsonfile) with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) if TE[0] > TE[1]: logger.warning('EchoTime1 > EchoTime2 in: ' + jsonfile) # Parse the acquisition time from the json file with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) acq_time = dateutil.parser.parse(data['AcquisitionTime']) niipath = glob.glob( os.path.splitext(jsonfile)[0] + '.nii*' )[0] # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) niipath = niipath.replace( bidsses + os.sep, '' ) # Use a relative path. Somehow .strip(bidsses) instead of replace(bidsses,'') does not work properly scans_table.loc[ niipath, 'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk logger.info('Writing acquisition time data to: ' + scans_tsv) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: if 'IntendedFor' in fieldmap['bids'] and fieldmap['bids'][ 'IntendedFor']: bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap, '1') acqlabel = bids.set_bidslabel(bidsname, 'acq') for jsonfile in glob.glob( os.path.join( bidsses, 'fmap', bidsname.replace( '_run-1_', '_run-[0-9]*_').replace( acqlabel, acqlabel + '[CE][0-9]*') + '.json') ): # Account for multiple runs and dcm2niix suffixes inserted into the acquisition label intendedfor = fieldmap['bids']['IntendedFor'] if intendedfor.startswith('<<') and intendedfor.endswith( '>>'): intendedfor = intendedfor[2:-2].split('><') else: intendedfor = [intendedfor] niifiles = [] for selector in intendedfor: niifiles.extend([ niifile.split(os.sep + subid + os.sep, 1)[1].replace('\\', '/') for niifile in sorted( glob.glob( os.path.join( bidsses, f'**{os.sep}*{selector}*.nii*'))) ]) # Search in all series using a relative path with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = niifiles logger.info('Adding IntendedFor to: ' + jsonfile) with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same seriesfolder) if jsonfile.endswith('magnitude1.json'): jsonfile2 = jsonfile.rsplit('1.json', 1)[0] + '2.json' if os.path.isfile(jsonfile2): with open(jsonfile2, 'r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: data['IntendedFor'] = niifiles logger.info('Adding IntendedFor to: ' + jsonfile2) with open(jsonfile2, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header dicomfile = bids.get_dicomfile(series) personals['participant_id'] = subid if sesid: personals['session_id'] = sesid personals['age'] = bids.get_dicomfield('PatientAge', dicomfile) personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def coin_dicom(session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None: """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param session: The full-path name of the subject/session source folder :param bidsmap: The full mapping heuristics from the bidsmap YAML-file :param bidsfolder: The full-path name of the BIDS root-folder :param personals: The dictionary with the personal information :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :return: Nothing """ if not bids.lsdirs(session): LOGGER.warning(f"No run subfolder(s) found in: {session}") return TE = [None, None] # Get valid BIDS subject/session identifiers from the (first) dicom-header or from the session source folder subid, sesid = bids.get_subid_sesid( bids.get_dicomfile(bids.lsdirs(session)[0]), bidsmap['DICOM']['subject'], bidsmap['DICOM']['session'], subprefix, sesprefix) if subid == subprefix: LOGGER.error(f"No valid subject identifier found for: {session}") return # Create the BIDS session-folder and a scans.tsv file bidsses = bidsfolder / subid / sesid if bidsses.is_dir(): LOGGER.warning( f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner" ) bidsses.mkdir(parents=True, exist_ok=True) scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv" if scans_tsv.is_file(): scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename') else: scans_table = pd.DataFrame(columns=['acq_time'], dtype='str') scans_table.index.name = 'filename' # Process all the dicom run subfolders for runfolder in bids.lsdirs(session): # Get a dicom-file dicomfile = bids.get_dicomfile(runfolder) if not dicomfile.name: continue # Get a matching run from the bidsmap run, modality, index = bids.get_matching_run(dicomfile, bidsmap) # Check if we should ignore this run if modality == bids.ignoremodality: LOGGER.info(f"Leaving out: {runfolder}") continue # Check if we already know this run if index is None: LOGGER.warning( f"Skipping unknown '{modality}': {dicomfile}\n-> re-run the bidsmapper and delete {session} to solve this warning" ) continue LOGGER.info(f"Processing: {runfolder}") # Create the BIDS session/modality folder bidsmodality = bidsses / modality bidsmodality.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run bidsname = bids.get_bidsname(subid, sesid, modality, run) runindex = run['bids']['run'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.increment_runindex(bidsmodality, bidsname) # Check if file already exists (-> e.g. when a static runindex is used) if (bidsmodality / bidsname).with_suffix('.json').is_file(): LOGGER.warning( f"{bidsmodality/bidsname}.* already exists -- check your results carefully!" ) # Convert the dicom-files in the run folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=runfolder) if not bids.run_command(command): continue # Replace uncropped output image with the cropped one if '-x y' in bidsmap['Options']['dcm2niix']['args']: for filename in sorted( bidsmodality.glob(bidsname + '*_Crop_*')): # e.g. *_Crop_1.nii.gz ext = ''.join(filename.suffixes) newfilename = str(filename).rsplit(ext, 1)[0].rsplit( '_Crop_', 1)[0] + ext LOGGER.info( f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}" ) filename.replace(newfilename) # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) -- possibly > 1 for dcm2niisuffix in ('_c', '_e', '_ph', '_i'): for filename in sorted( bidsmodality.glob(bidsname + dcm2niisuffix + '*')): ext = ''.join(filename.suffixes) basepath, index = str(filename).rsplit(ext, 1)[0].rsplit( dcm2niisuffix, 1 ) # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1) basesuffix = basepath.rsplit( '_', 1 )[1] # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1 index = index.split('_')[0].zfill( 2 ) # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files) # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file if dcm2niisuffix in ( '_c', '_e' ) and int(index) == 2 and basesuffix not in [ 'magnitude1', 'phase1' ]: # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below) filename_ce = Path( basepath + ext) # The file without the _c1/_e1 suffix if dcm2niisuffix == '_e' and bids.get_bidsvalue( basepath, 'echo'): newbasepath_ce = Path( bids.get_bidsvalue(basepath, 'echo', '1')) else: newbasepath_ce = Path( bids.get_bidsvalue( basepath, 'dummy', dcm2niisuffix.upper() + '1'.zfill(len(index))) ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data newfilename_ce = newbasepath_ce.with_suffix( ext) # The file as it should have been if filename_ce.is_file(): if filename_ce != newfilename_ce: LOGGER.info( f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}" ) filename_ce.replace(newfilename_ce) if ext == '.json': jsonfiles.append( newbasepath_ce.with_suffix('.json')) # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder) if dcm2niisuffix == '_e' and bids.get_bidsvalue( basepath, 'echo') and index: basepath = bids.get_bidsvalue( basepath, 'echo', str(int(index)) ) # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness elif dcm2niisuffix == '_e' and basesuffix in ( 'magnitude1', 'magnitude2') and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_magnitude1_e[index] -> *_magnitude[index] # Collect the echo times that need to be added to the json-file (see below) if filename.suffix == '.json': with filename.open('r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] LOGGER.info( f"Collected EchoTime{index} = {data['EchoTime']} from: {filename}" ) elif dcm2niisuffix == '_e' and basesuffix == 'phasediff' and index: # i.e. modality == 'fmap' pass elif dcm2niisuffix == '_e' and basesuffix in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + str( int(index) ) # basepath: *_phase1_e[index]_ph -> *_phase[index] else: basepath = bids.get_bidsvalue( basepath, 'dummy', dcm2niisuffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data # Save the file with a new name newbidsname = str(Path(basepath).name) if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, newbidsname, ext ) # Update the runindex now that the acq-label has changed newfilename = (bidsmodality / newbidsname).with_suffix(ext) LOGGER.info( f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}" ) filename.replace(newfilename) if ext == '.json': jsonfiles.append( (bidsmodality / newbidsname).with_suffix('.json')) # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file) if not jsonfiles: jsonfiles = [(bidsmodality / bidsname).with_suffix('.json')] for jsonfile in set(jsonfiles): # Check if dcm2niix behaved as expected if not jsonfile.is_file(): LOGGER.error( f"Unexpected file conversion result: {jsonfile} not found") continue # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = jsonfile.with_suffix('.bvec') bvalfile = jsonfile.with_suffix('.bval') if not bvecfile.is_file(): LOGGER.info(f"Adding dummy bvec file: {bvecfile}") with bvecfile.open('w') as bvec_fid: bvec_fid.write('0\n0\n0\n') if not bvalfile.is_file(): LOGGER.info(f"Adding dummy bval file: {bvalfile}") with bvalfile.open('w') as bval_fid: bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with jsonfile.open('r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: LOGGER.info(f"Adding TaskName to: {jsonfile}") data['TaskName'] = run['bids']['task'] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude runs have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-runs being saved after the magnitude runs elif modality == 'fmap': if run['bids']['suffix'] == 'phasediff': LOGGER.info( f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}" ) if TE[0] is None or TE[1] is None: LOGGER.warning( f"Missing Echo-Time data for: {jsonfile}") elif TE[0] > TE[1]: LOGGER.warning( f"EchoTime1 > EchoTime2 for: {jsonfile}") with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Parse the acquisition time from the json file or else from the dicom header (NB: assuming the dicom file represents the first aqcuisition) with jsonfile.open('r') as json_fid: data = json.load(json_fid) if 'AcquisitionTime' not in data: data['AcquisitionTime'] = bids.get_dicomfield( 'AcquisitionTime', dicomfile) acq_time = dateutil.parser.parse(data['AcquisitionTime']) scanpath = list( jsonfile.parent.glob(jsonfile.stem + '.nii*') )[0].relative_to( bidsses ) # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension) scans_table.loc[ scanpath.as_posix(), 'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S') # Write the scans_table to disk LOGGER.info(f"Writing acquisition time data to: {scans_tsv}") scans_table.sort_values(by=['acq_time', 'filename'], inplace=True) scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8') # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap) niifiles = [] intendedfor = fieldmap['bids']['IntendedFor'] # Search for the imaging files that match the IntendedFor search criteria if intendedfor: if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') elif not isinstance(intendedfor, list): intendedfor = [intendedfor] for selector in intendedfor: niifiles.extend( [ Path(niifile).relative_to(bidsfolder / subid) for niifile in sorted( bidsses.rglob(f"*{selector}*.nii*")) if selector ] ) # Search in all runs using a relative path to the subject folder else: intendedfor = [] # Save the IntendedFor data in the json-files (account for multiple runs and dcm2niix suffixes inserted into the acquisition label) acqlabel = bids.get_bidsvalue(bidsname, 'acq') for jsonfile in list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_') + '.json')) + \ list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_').replace(acqlabel, acqlabel+'[CE][0-9]*') + '.json')): if niifiles: LOGGER.info(f"Adding IntendedFor to: {jsonfile}") elif intendedfor: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results" ) else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty" ) with jsonfile.open('r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 and phase2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same runfolder) if jsonfile.name.endswith( 'magnitude1.json') or jsonfile.name.endswith( 'phase1.json'): jsonfile2 = jsonfile.with_name( jsonfile.name.rsplit('1.json', 1)[0] + '2.json') if jsonfile2.is_file(): with jsonfile2.open('r') as json_fid: data = json.load(json_fid) if 'IntendedFor' not in data: if niifiles: LOGGER.info( f"Adding IntendedFor to: {jsonfile2}") else: LOGGER.warning( f"Empty 'IntendedFor' fieldmap value in {jsonfile2}: the search for {intendedfor} gave no results" ) data['IntendedFor'] = [ niifile.as_posix() for niifile in niifiles ] # The path needs to use forward slashes instead of backward slashes with jsonfile2.open('w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header: only from the first session (-> BIDS specification) if 'runfolder' in locals(): dicomfile = bids.get_dicomfile(runfolder) personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return age = bids.get_dicomfield( 'PatientAge', dicomfile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def coin_dicom(session, bidsmap, bidsfolder, personals): """ Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the dicom header :param str session: The full-path name of the subject/session source folder :param dict bidsmap: The full mapping heuristics from the bidsmap YAML-file :param str bidsfolder: The full-path name of the BIDS root-folder :param dict personals: The dictionary with the personal information :return: Nothing :rtype: NoneType """ global logfile TE = [None, None] # Get a valid BIDS subject identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['participant_label'] and bidsmap['DICOM'][ 'participant_label'].startswith( '<<') and bidsmap['DICOM']['participant_label'].endswith('>>'): subid = 'sub-' + bids.get_dicomfield( bidsmap['DICOM']['participant_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['participant_label']: subid = 'sub-' + bidsmap['DICOM']['participant_label'] else: subid = 'sub-' + session.rsplit(os.sep + 'sub-', 1)[1].split( os.sep + 'ses-', 1)[0] if subid == 'sub-': bids.printlog( 'Error: No valid subject identifier found for: ' + session, logfile) return # Get a BIDS session identifier from the (first) dicom-header or from the session source folder if bidsmap['DICOM']['session_label'] and bidsmap['DICOM'][ 'session_label'].startswith( '<<') and bidsmap['DICOM']['session_label'].endswith('>>'): sesid = 'ses-' + bids.get_dicomfield( bidsmap['DICOM']['session_label'][2:-2], bids.get_dicomfile(bids.lsdirs(session)[0])) elif bidsmap['DICOM']['session_label']: sesid = 'ses-' + bidsmap['DICOM']['session_label'] elif os.sep + 'ses-' in session: sesid = 'ses-' + session.rsplit(os.sep + 'ses-')[1] else: sesid = '' # Create the BIDS session-folder bidsses = os.path.join( bidsfolder, subid, sesid ) # NB: This gives a trailing '/' if ses=='', but that should be ok os.makedirs(bidsses, exist_ok=True) # Process all the dicom series subfolders for series in bids.lsdirs(session): bids.printlog('Processing dicom-folder: ' + series, logfile) # Get the cleaned-up bids labels from a dicom-file and bidsmap dicomfile = bids.get_dicomfile(series) result = bids.get_matching_dicomseries(dicomfile, bidsmap) series_ = result['series'] modality = result['modality'] # Create the BIDS session/modality folder bidsmodality = os.path.join(bidsses, modality) os.makedirs(bidsmodality, exist_ok=True) # Compose the BIDS filename using the bids labels and run-index runindex = series_['run_index'] if runindex.startswith('<<') and runindex.endswith('>>'): bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex[2:-2]) bidsname = bids.increment_runindex(bidsmodality, bidsname) else: bidsname = bids.get_bidsname(subid, sesid, modality, series_, runindex) # Convert the dicom-files in the series folder to nifti's in the BIDS-folder command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format( path=bidsmap['Options']['dcm2niix']['path'], args=bidsmap['Options']['dcm2niix']['args'], filename=bidsname, outfolder=bidsmodality, infolder=series) bids.printlog('$ ' + command, logfile) process = subprocess.run( command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) # TODO: investigate shell=False and capture_output=True bids.printlog(process.stdout.decode('utf-8'), logfile) if process.returncode != 0: errormsg = 'Error: Failed to process {} (errorcode {})'.format( series, process.returncode) bids.printlog(errormsg, logfile) continue # Rename all files ending with _c%d, _e%d and _ph: These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively jsonfiles = [ ] # Collect the associated json-files (for updating them later) for suffix in ('_c', '_e', '_ph'): for filename in sorted( glob.glob( os.path.join(bidsmodality, bidsname + suffix + '*'))): basepath, ext1 = os.path.splitext(filename) basepath, ext2 = os.path.splitext( basepath) # Account for .nii.gz files basepath, index = basepath.rsplit(suffix, 1) if suffix == '_e' and bids.set_bidslabel(basepath, 'echo') and index: basepath = bids.set_bidslabel(basepath, 'echo', index) elif suffix == '_e' and basepath.rsplit('_', 1)[1] in [ 'magnitude1', 'magnitude2' ] and index: # i.e. modality == 'fmap' basepath = basepath[0:-1] + index # Read the echo times that need to be added to the json-file (see below) if os.path.splitext(filename)[1] == '.json': with open(filename, 'r') as json_fid: data = json.load(json_fid) TE[int(index) - 1] = data['EchoTime'] bids.printlog( 'Reading EchoTime{} = {} from: {}'.format( index, data['EchoTime'], filename), logfile) elif suffix == '_e' and basepath.rsplit( '_', 1 )[1] == 'phasediff' and index: # i.e. modality == 'fmap' pass elif suffix == '_ph' and basepath.rsplit('_', 1)[1] in [ 'phase1', 'phase2' ] and index: # i.e. modality == 'fmap' (TODO: untested) basepath = basepath[0:-1] + index bids.printlog( 'WARNING: Untested dcm2niix "_ph"-filetype: ' + basepath, logfile) else: basepath = bids.set_bidslabel( basepath, 'dummy', suffix.upper() + index ) # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data if runindex.startswith('<<') and runindex.endswith('>>'): newbidsname = bids.increment_runindex( bidsmodality, os.path.basename(basepath), ext2 + ext1 ) # Update the runindex now that the acq-label has changed else: newbidsname = os.path.basename(basepath) newfilename = os.path.join(bidsmodality, newbidsname + ext2 + ext1) bids.printlog( 'Found dcm2niix {} suffix, renaming\n{} ->\n{}'.format( suffix, filename, newfilename), logfile) os.rename(filename, newfilename) if ext1 == '.json': jsonfiles.append( os.path.join(bidsmodality, newbidsname + '.json')) # Loop over and adapt all the newly produced json files (every nifti file comes with a json file) if not jsonfiles: jsonfiles = [os.path.join(bidsmodality, bidsname + '.json')] for jsonfile in jsonfiles: # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans) if modality == 'dwi': bvecfile = os.path.splitext(jsonfile)[0] + '.bvec' bvalfile = os.path.splitext(jsonfile)[0] + '.bval' if not os.path.isfile(bvecfile): with open(bvecfile, 'w') as bvec_fid: bids.printlog('Adding dummy bvec file: ' + bvecfile, logfile) bvec_fid.write('0\n0\n0\n') if not os.path.isfile(bvalfile): with open(bvalfile, 'w') as bval_fid: bids.printlog('Adding dummy bval file: ' + bvalfile, logfile) bval_fid.write('0\n') # Add the TaskName to the func json-file elif modality == 'func': with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) if not 'TaskName' in data: bids.printlog('Adding TaskName to: ' + jsonfile, logfile) with open(jsonfile, 'w') as json_fid: data['TaskName'] = series_['task_label'] json.dump(data, json_fid, indent=4) # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude series have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-series being saved after the magnitude series elif modality == 'fmap': if series_['suffix'] == 'phasediff': bids.printlog( 'Adding EchoTime1 and EchoTime2 to: ' + jsonfile, logfile) with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) data['EchoTime1'] = TE[0] data['EchoTime2'] = TE[1] with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) if TE[0] > TE[1]: bids.printlog( 'WARNING: EchoTime1 > EchoTime2 in: ' + jsonfile, logfile) # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk) if bidsmap['DICOM']['fmap'] is not None: for fieldmap in bidsmap['DICOM']['fmap']: if 'IntendedFor' in fieldmap and fieldmap['IntendedFor']: jsonfile = os.path.join( bidsses, 'fmap', bids.get_bidsname(subid, sesid, 'fmap', fieldmap, '1') + '.json' ) # TODO: Assumes that there is only 1 fieldmap acquired for each bidsmap entry / series if not os.path.isfile(jsonfile): continue intendedfor = fieldmap['IntendedFor'] if intendedfor.startswith('<<') and intendedfor.endswith('>>'): intendedfor = intendedfor[2:-2].split('><') else: intendedfor = [intendedfor] with open(jsonfile, 'r') as json_fid: data = json.load(json_fid) niifiles = [ niifile.split(os.sep + subid + os.sep, 1)[1] for niifile in sorted( glob.glob( os.path.join( bidsses, '**' + os.sep + '*' + '*'.join(intendedfor) + '*.nii*'))) ] # Use a relative path data['IntendedFor'] = niifiles bids.printlog('Adding IntendedFor to: ' + jsonfile, logfile) with open(jsonfile, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Catch magnitude2 files produced by dcm2niix if jsonfile.endswith('magnitude1.json'): jsonfile2 = jsonfile.rsplit('1.json', 1)[0] + '2.json' if os.path.isfile(jsonfile2): with open(jsonfile2, 'r') as json_fid: data = json.load(json_fid) data['IntendedFor'] = niifiles bids.printlog('Adding IntendedFor to: ' + jsonfile2, logfile) with open(jsonfile2, 'w') as json_fid: json.dump(data, json_fid, indent=4) # Collect personal data from the DICOM header dicomfile = bids.get_dicomfile(series) personals['participant_id'] = subid if sesid: personals[ 'session_id'] = sesid # TODO: Check if this can be in the participants.tsv file according to BIDS personals['age'] = bids.get_dicomfield('PatientAge', dicomfile) personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile) personals['size'] = bids.get_dicomfield('PatientSize', dicomfile) personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def rawmapper(rawfolder, outfolder: Path = Path(), sessions: list = [], rename: bool = False, dicomfield: tuple = ('PatientComments', ), wildcard: str = '*', subprefix: str = 'sub-', sesprefix: str = 'ses-', dryrun: bool = False) -> None: """ :param rawfolder: The root folder-name of the sub/ses/data/file tree containing the source data files :param outfolder: The name of the folder where the mapping-file is saved (default = sourcefolder) :param sessions: Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected :param rename: Flag for renaming the sub-subid folders to sub-dicomfield :param dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval) :param wildcard: The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped :param subprefix: The prefix common for all source subject-folders :param sesprefix: The prefix common for all source session-folders :param dryrun: Flag for dry-running renaming the sub-subid folders :return: Nothing """ # Input checking rawfolder = Path(rawfolder) if not outfolder: outfolder = rawfolder print(f"Outfolder: {outfolder}") outfolder = Path(outfolder) # Create or append the output to a mapper logfile mapperfile = outfolder / f"rawmapper_{'_'.join(dicomfield)}.tsv" if not dryrun: if rename: with mapperfile.open('a') as fid: fid.write('subid\tsesid\tnewsubid\tnewsesid\n') else: with mapperfile.open('x') as fid: fid.write('subid\tsesid\tseriesname\t{}\n'.format( '\t'.join(dicomfield))) # Map the sessions in the sourcefolder if not sessions: sessions = list(rawfolder.glob(f"{subprefix}*/{sesprefix}*")) if not sessions: sessions = rawfolder.glob( f"{subprefix}*") # Try without session-subfolders else: sessions = [ sessionitem for session in sessions for sessionitem in rawfolder.rglob(session) ] # Loop over the selected sessions in the sourcefolder for session in sessions: # Get the subject and session identifiers from the raw folder subid, sesid = bids.get_subid_sesid(session) # Parse the new subject and session identifiers from the dicomfield series = bids.lsdirs(session, wildcard) if not series: series = '' dcmval = '' else: series = series[0] # TODO: loop over series? dcmval = '' for dcmfield in dicomfield: dcmval = dcmval + '/' + str( bids.get_dicomfield(dcmfield, bids.get_dicomfile(series))) dcmval = dcmval[1:] # Rename the session subfolder in the sourcefolder and print & save this info if rename: # Get the new subid and sesid if not dcmval or dcmval == 'None': warnings.warn( f"Skipping renaming because the dicom-field was empty for: {session}" ) continue else: if '/' in dcmval: # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments) delim = '/' elif '\\' in dcmval: delim = '\\' else: delim = '\r\n' newsubsesid = [val for val in dcmval.split(delim) if val] # Skip empty lines / entries newsubid = subprefix + bids.cleanup_value( re.sub(f'^{subprefix}', '', newsubsesid[0])) if newsubid == subprefix or newsubid == subprefix + 'None': newsubid = subid warnings.warn( 'Could not rename {} because the dicom-field was empty for: {}' .format(subid, session)) if len(newsubsesid) == 1: newsesid = sesid elif len(newsubsesid) == 2: newsesid = sesprefix + bids.cleanup_value( re.sub(f'^{sesprefix}', '', newsubsesid[1])) else: warnings.warn( f"Skipping renaming of {session} because the dicom-field '{dcmval}' could not be parsed into [subid, sesid]" ) continue if newsesid == sesprefix or newsesid == subprefix + 'None': newsesid = sesid warnings.warn( f"Could not rename {sesid} because the dicom-field was empty for: {session}" ) # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists) newsession = rawfolder / newsubid / newsesid print(f"{session} -> {newsession}") if newsession == session: continue if newsession.is_dir(): warnings.warn( f"{newsession} already exists, skipping renaming of {session}" ) elif not dryrun: with mapperfile.open('a') as fid: fid.write(f"{subid}\t{sesid}\t{newsubid}\t{newsesid}\n") session.rename(newsession) # Print & save the dicom values else: print('{}/{}/{}\t-> {}'.format(subid, sesid, series.name, '\t'.join(dcmval.split('/')))) if not dryrun: with mapperfile.open('a') as fid: fid.write('{}\t{}\t{}\t{}\n'.format( subid, sesid, series.name, '\t'.join(dcmval.split('/'))))
def sortsession(sessionfolder: str, dicomfiles: list, rename: bool, ext: str, nosort: bool) -> None: """ Sorts dicomfiles into (3-digit) SeriesNumber-SeriesDescription subfolders (e.g. '003-T1MPRAGE') :param sessionfolder: The name of the destination folder of the dicom files :param dicomfiles: The list of dicomfiles to be sorted and/or renamed :param rename: Boolean to rename the DICOM files to a PatientName_SeriesNumber_SeriesDescription_AcquisitionNumber_InstanceNumber scheme :param ext: The file extension after sorting (empty value keeps original file extension) :param nosort: Boolean to skip sorting of DICOM files into SeriesNumber-SeriesDescription directories (useful in combination with -r for renaming only) :return: Nothing """ # Map all dicomfiles and move them to series folders print(f'>> Processing: {sessionfolder} ({len(dicomfiles)} files)') if not os.path.isdir(sessionfolder): os.makedirs(sessionfolder) seriesdirs = [] for dicomfile in dicomfiles: # Extract the SeriesDescription and SeriesNumber from the dicomfield seriesnr = bids.get_dicomfield('SeriesNumber', dicomfile) seriesdescr = bids.get_dicomfield('SeriesDescription', dicomfile) if not seriesnr: warnings.warn(f'No SeriesNumber found, skipping: {dicomfile}') # This is not a normal DICOM file, better not do anything with it continue if not seriesdescr: seriesdescr = bids.get_dicomfield('ProtocolName', dicomfile) if not seriesdescr: seriesdescr = 'unknown_protocol' warnings.warn(f'No SeriesDecription or ProtocolName found for: {dicomfile}') if rename: acquisitionnr = bids.get_dicomfield('AcquisitionNumber', dicomfile) instancenr = bids.get_dicomfield('InstanceNumber', dicomfile) if not instancenr: instancenr = bids.get_dicomfield('ImageNumber', dicomfile) # This Attribute was named Image Number in earlier versions of this Standard patientname = bids.get_dicomfield('PatientName', dicomfile) if not patientname: patientname = bids.get_dicomfield('PatientsName', dicomfile) # This Attribute was/is sometimes called PatientsName? # Move and/or rename the dicomfile in(to) the (series sub)folder if rename and not (patientname and seriesnr and seriesdescr and acquisitionnr and instancenr): warnings.warn(f'Missing one or more crucial DICOM-fields, cannot safely rename {dicomfile}\npatientname = {patientname}\nseriesnumber = {seriesnr}\nseriesdescription = {seriesdescr}\nacquisitionnr = {acquisitionnr}\ninstancenr = {instancenr}') filename = os.path.basename(dicomfile) elif rename: filename = cleanup(f'{patientname}_{seriesnr:03d}_{seriesdescr}_{acquisitionnr:05d}_{instancenr:05d}{ext}') else: filename = os.path.basename(dicomfile) if nosort: pathname = sessionfolder else: # Create the series subfolder seriesdir = cleanup(f'{seriesnr:03d}-{seriesdescr}') if seriesdir not in seriesdirs: # We have a new series if not os.path.isdir(os.path.join(sessionfolder, seriesdir)): print(' Creating: ' + os.path.join(sessionfolder, seriesdir)) os.makedirs(os.path.join(sessionfolder, seriesdir)) seriesdirs.append(seriesdir) pathname = os.path.join(sessionfolder, seriesdir) if ext: newfilename = os.path.join(pathname, os.path.splitext(filename)[0] + ext) else: newfilename = os.path.join(pathname, filename) if os.path.isfile(newfilename): warnings.warn(f'File already exists, cannot safely rename {dicomfile} -> {newfilename}') else: os.rename(dicomfile, newfilename)
def scanparticipant(dataformat: str, session: Path, personals: dict, subid: str, sesid: str) -> bool: """ Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and extracts personals (e.g. Age, Sex) from the source header :param session: The full-path name of the subject/session source file/folder :param personals: The dictionary with the personal information :param subid: The subject-id from the bids-folder :param sesid: The session-id from the bids-folder :return: True if successful """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file sourcefile = Path() if dataformat == 'DICOM': sources = bids.lsdirs(session) for source in sources: sourcefile = bids.get_dicomfile(source) if sourcefile.name: break elif dataformat == 'PAR': sources = bids.get_parfiles(session) if sources: sourcefile = sources[0] else: LOGGER.error( f"Unsupported data format: {dataformat}\nPlease report this bug") return False if not sources: LOGGER.info(f"No data found for: {session}") return False # Collect personal data from a source header (PAR/XML does not contain personal info) if dataformat == 'DICOM' and sourcefile.name: personals['participant_id'] = subid if sesid: if 'session_id' not in personals: personals['session_id'] = sesid else: return False # Only from the first session -> BIDS specification age = bids.get_dicomfield( 'PatientAge', sourcefile ) # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY if age.endswith('D'): personals['age'] = str(int(float(age.rstrip('D')) / 365.2524)) elif age.endswith('W'): personals['age'] = str(int(float(age.rstrip('W')) / 52.1775)) elif age.endswith('M'): personals['age'] = str(int(float(age.rstrip('M')) / 12)) elif age.endswith('Y'): personals['age'] = str(int(float(age.rstrip('Y')))) elif age: personals['age'] = age personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile) personals['size'] = bids.get_dicomfield('PatientSize', sourcefile) personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile) return True
def sortsession(sessionfolder, pattern, rename, nosort): """ Sorts dicomfiles into (3-digit) SeriesNumber-SeriesDescription subfolders (e.g. '003-T1MPRAGE') :param str sessionfolder: The name of the folder that contains the dicom files :param str pattern: The regular expression pattern used in re.match(pattern, dicomfile) to select the dicom files :param bool rename: Boolean to rename the DICOM files to a PatientName_SeriesNumber_SeriesDescription_AcquisitionNumber_InstanceNumber scheme :param bool nosort: Boolean to skip sorting of DICOM files into SeriesNumber-SeriesDescription directories (useful in combination with -r for renaming only) :return: Nothing :rtype: NoneType """ # Input checking sessionfolder = os.path.abspath(os.path.expanduser(sessionfolder)) seriesdirs = [] print('>> processing: ' + sessionfolder) # Map all dicomfiles and move them to series folders for dicomfile in [ os.path.join(sessionfolder, dcmfile) for dcmfile in os.listdir(sessionfolder) if re.match(pattern, dcmfile) ]: # Extract the SeriesDescription and SeriesNumber from the dicomfield seriesnr = bids.get_dicomfield('SeriesNumber', dicomfile) seriesdescr = bids.get_dicomfield('SeriesDescription', dicomfile) if not seriesdescr: seriesdescr = bids.get_dicomfield('ProtocolName', dicomfile) if not seriesdescr: seriesdescr = 'unknown_protocol' warnings.warn( 'No SeriesDecription or ProtocolName found for: ' + dicomfile) if rename: acquisitionnr = bids.get_dicomfield('AcquisitionNumber', dicomfile) instancenr = bids.get_dicomfield('InstanceNumber', dicomfile) if not instancenr: instancenr = bids.get_dicomfield( 'ImageNumber', dicomfile ) # This Attribute was named Image Number in earlier versions of this Standard patientname = bids.get_dicomfield('PatientName', dicomfile) if not patientname: patientname = bids.get_dicomfield( 'PatientsName', dicomfile ) # This Attribute was/is sometimes called PatientsName? ext = os.path.splitext(dicomfile)[1] # Move and/or rename the dicomfile in(to) the (series sub)folder if rename: filename = '{patientname}_{seriesnr:03d}_{seriesdescr}_{acquisitionnr:05d}_{instancenr:05d}{ext}'.format( patientname=patientname, seriesnr=seriesnr, seriesdescr=seriesdescr, acquisitionnr=acquisitionnr, instancenr=instancenr, ext=ext) else: filename = os.path.basename(dicomfile) if nosort: pathname = sessionfolder else: # Create the series subfolder seriesdir = '{:03d}-{}'.format(seriesnr, seriesdescr) if seriesdir not in seriesdirs: # We have a new series if not os.path.isdir(os.path.join(sessionfolder, seriesdir)): print(' Creating: ' + os.path.join(sessionfolder, seriesdir)) os.makedirs(os.path.join(sessionfolder, seriesdir)) seriesdirs.append(seriesdir) pathname = os.path.join(sessionfolder, seriesdir) os.rename(dicomfile, os.path.join(pathname, filename))