Ejemplo n.º 1
0
def sortsessions(rawfolder,
                 subjectid='',
                 sessionid='',
                 pattern='.*\.(IMA|dcm)$',
                 rename=False,
                 nosort=False):
    """

    :param rawfolder:   The root folder containing the source [sub/][ses/]dicomfiles
    :param subjectid:   The prefix of the sub folders in rawfolder
    :param sessionid:   The prefix of the ses folders in sub folder
    :param pattern:     The regular expression pattern used in re.match() to select the dicom files
    :param bool rename: Boolean to rename the DICOM files to a PatientName_SeriesNumber_SeriesDescription_AcquisitionNumber_InstanceNumber scheme
    :param bool nosort: Boolean to skip sorting of DICOM files into SeriesNumber-SeriesDescription directories (useful in combination with -r for renaming only)
    :return:            Nothing
    :rtype: NoneType
    """

    if subjectid:
        for subfolder in bids.lsdirs(rawfolder, subjectid + '*'):
            if sessionid:
                for sesfolder in bids.lsdirs(subfolder, sessionid + '*'):
                    sortsession(sesfolder, pattern, rename, nosort)
            else:
                sortsession(subfolder, pattern, rename, nosort)
    else:
        sortsession(rawfolder, pattern, rename, nosort)
Ejemplo n.º 2
0
def sortsessions(session: str, subjectid: str='', sessionid: str='', rename: bool=False, ext: str='', nosort: bool=False, pattern: str='.*\.(IMA|dcm)$') -> None:
    """

    :param session:     The root folder containing the source [sub/][ses/]dicomfiles or the DICOMDIR file
    :param subjectid:   The prefix of the sub folders in session
    :param sessionid:   The prefix of the ses folders in sub folder
    :param rename:      Boolean to rename the DICOM files to a PatientName_SeriesNumber_SeriesDescription_AcquisitionNumber_InstanceNumber scheme
    :param ext:         The file extension after sorting (empty value keeps original file extension)
    :param nosort:      Boolean to skip sorting of DICOM files into SeriesNumber-SeriesDescription directories (useful in combination with -r for renaming only)
    :param pattern:     The regular expression pattern used in re.match() to select the dicom files
    :return:            Nothing
    """

    # Input checking
    session = os.path.abspath(os.path.expanduser(session))

    # Define the sessionfolder, collect all DICOM files and run sortsession()
    if subjectid:   # Do a recursive search, assuming session is a foldername, not a DICOMDIR file

        for subfolder in bids.lsdirs(session, subjectid + '*'):
            if sessionid:
                sessionfolders = bids.lsdirs(subfolder, sessionid + '*')
            else:
                sessionfolders = [subfolder]

            for sessionfolder in sessionfolders:
                dicomfiles = [os.path.join(sessionfolder, dcmfile) for dcmfile in os.listdir(sessionfolder) if re.match(pattern, dcmfile)]
                sortsession(sessionfolder, dicomfiles, rename, ext, nosort)

    else:

        if os.path.basename(session) == 'DICOMDIR':

            from pydicom.filereader import read_dicomdir

            dicomdir = read_dicomdir(session)

            sessionfolder  = os.path.dirname(session)
            sessionfolder_ = sessionfolder
            for patient in dicomdir.patient_records:
                if len(dicomdir.patient_records) > 1:
                    sessionfolder = os.path.join(sessionfolder_, f'sub-{cleanup(str(patient.PatientName))}')

                for n, study in enumerate(patient.children, 1):                                                             # TODO: Check order
                    if len(patient.children) > 1:
                        sessionfolder = os.path.join(sessionfolder_, f'ses-{n:02}{cleanup(str(study.StudyDescription))}')   # TODO: Leave out StudyDescrtiption?
                        print(f'WARNING: the session index-number "{n:02}" is not necessarily meaningful: {sessionfolder}')

                    dicomfiles = []
                    for series in study.children:
                        dicomfiles.extend([os.path.join(sessionfolder_, *image.ReferencedFileID) for image in series.children])
                    sortsession(sessionfolder, dicomfiles, rename, ext, nosort)

        else:

            sessionfolder = session
            dicomfiles    = [os.path.join(sessionfolder,dcmfile) for dcmfile in os.listdir(sessionfolder) if re.match(pattern, dcmfile)]
            sortsession(sessionfolder, dicomfiles, rename, ext, nosort)
Ejemplo n.º 3
0
def deface(bidsdir: str, pattern: str, subjects: list, output: str,
           cluster: bool, nativespec: str, kwargs: dict):

    # Input checking
    bidsdir = Path(bidsdir)

    # Start logging
    bids.setup_logging(bidsdir / 'code' / 'bidscoin' / 'deface.log')
    LOGGER.info('')
    LOGGER.info('------------ START deface ------------')
    LOGGER.info(
        f">>> deface bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
        f" cluster={cluster} nativespec={nativespec} {kwargs}")

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = [
            'sub-' + subject.replace('^sub-', '') for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            bidsdir / subject for subject in subjects
            if (bidsdir / subject).is_dir()
        ]

    # Prepare the HPC job submission
    with drmaa.Session() as pbatch:
        if cluster:
            jt = pbatch.createJobTemplate()
            jt.jobEnvironment = os.environ
            jt.remoteCommand = shutil.which('pydeface')
            jt.nativeSpecification = nativespec
            jt.joinFiles = True

        # Loop over bids subject/session-directories
        for n, subject in enumerate(subjects, 1):

            sessions = bids.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                sub_id, ses_id = bids.get_subid_sesid(session / 'dum.my')

                # Search for images that need to be defaced
                for match in sorted([
                        match for match in session.glob(pattern)
                        if '.nii' in match.suffixes
                ]):

                    # Construct the output filename and relative path name (used in BIDS)
                    match_rel = str(match.relative_to(session))
                    if not output:
                        outputfile = match
                        outputfile_rel = match_rel
                    elif output == 'derivatives':
                        outputfile = bidsdir / 'derivatives' / 'deface' / sub_id / ses_id / match.parent.name / match.name
                        outputfile_rel = str(outputfile.relative_to(bidsdir))
                    else:
                        outputfile = session / output / match.name
                        outputfile_rel = str(outputfile.relative_to(session))
                    outputfile.parent.mkdir(parents=True, exist_ok=True)

                    # Deface the image
                    LOGGER.info(f"Defacing: {match_rel} -> {outputfile_rel}")
                    if cluster:
                        jt.args = [
                            str(match), '--outfile',
                            str(outputfile), '--force'
                        ] + [
                            item for pair in [[f"--{key}", val]
                                              for key, val in kwargs.items()]
                            for item in pair
                        ]
                        jt.jobName = f"pydeface_{sub_id}_{ses_id}"
                        jobid = pbatch.runJob(jt)
                        LOGGER.info(
                            f"Your deface job has been submitted with ID: {jobid}"
                        )
                    else:
                        pdu.deface_image(str(match),
                                         str(outputfile),
                                         force=True,
                                         forcecleanup=True,
                                         **kwargs)

                    # Add a json sidecar-file
                    outputjson = outputfile.with_suffix('').with_suffix(
                        '.json')
                    LOGGER.info(f"Adding a json sidecar-file: {outputjson}")
                    shutil.copyfile(
                        match.with_suffix('').with_suffix('.json'), outputjson)

                    # Update the IntendedFor fields in the fieldmap sidecar files
                    if output and output != 'derivatives' and (
                            session / 'fmap').is_dir():
                        for fmap in (session / 'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                fmap_data = json.load(fmap_fid)
                            intendedfor = fmap_data['IntendedFor']
                            if type(intendedfor) == str:
                                intendedfor = [intendedfor]
                            if match_rel in intendedfor:
                                LOGGER.info(
                                    f"Updating 'IntendedFor' to {outputfile_rel} in {fmap}"
                                )
                                fmap_data['IntendedFor'] = intendedfor + [
                                    outputfile_rel
                                ]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    scans_tsv = session / f"{sub_id}{bids.add_prefix('_',ses_id)}_scans.tsv"
                    if output and output != 'derivatives' and scans_tsv.is_file(
                    ):
                        LOGGER.info(f"Adding {outputfile_rel} to {scans_tsv}")
                        scans_table = pd.read_csv(scans_tsv,
                                                  sep='\t',
                                                  index_col='filename')
                        scans_table.loc[outputfile_rel] = scans_table.loc[
                            match_rel]
                        scans_table.sort_values(by=['acq_time', 'filename'],
                                                inplace=True)
                        scans_table.to_csv(scans_tsv,
                                           sep='\t',
                                           encoding='utf-8')

        if cluster:
            LOGGER.info('Waiting for the deface jobs to finish...')
            pbatch.synchronize(jobIds=[pbatch.JOB_IDS_SESSION_ALL],
                               timeout=pbatch.TIMEOUT_WAIT_FOREVER,
                               dispose=True)
            pbatch.deleteJobTemplate(jt)

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Ejemplo n.º 4
0
def bidscoiner(rawfolder: str,
               bidsfolder: str,
               subjects: tuple = (),
               force: bool = False,
               participants: bool = False,
               bidsmapfile: str = 'code' + os.sep + 'bidsmap.yaml',
               subprefix: str = 'sub-',
               sesprefix: str = 'ses-') -> None:
    """
    Main function that processes all the subjects and session in the rawfolder and uses the
    bidsmap.yaml file in bidsfolder/code to cast the data into the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param subjects:        List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the rawfolder will be selected
    :param force:           If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped
    :param participants:    If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder = os.path.abspath(os.path.expanduser(rawfolder))
    bidsfolder = os.path.abspath(os.path.expanduser(bidsfolder))
    logfile = os.path.join(bidsfolder, 'code', 'bidscoiner.log')

    setup_logging(logfile)

    # Create a code subfolder
    os.makedirs(os.path.join(bidsfolder, 'code'), exist_ok=True)
    if not os.path.isfile(os.path.join(bidsfolder, '.bidsignore')):
        with open(os.path.join(bidsfolder, '.bidsignore'), 'w') as bidsignore:
            bidsignore.write(bids.unknownmodality + os.sep)

    # Start logging
    logger.info(
        f'------------ START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------'
    )
    logger.info(
        f'>>> bidscoiner rawfolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}'
        f' participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}'
    )

    # Create a dataset description file if it does not exist
    dataset_file = os.path.join(bidsfolder, 'dataset_description.json')
    if not os.path.isfile(dataset_file):
        dataset_description = {
            "Name":
            "REQUIRED. Name of the dataset",
            "BIDSVersion":
            bids.bidsversion(),
            "License":
            "RECOMMENDED. What license is this dataset distributed under?. The use of license name abbreviations is suggested for specifying a license",
            "Authors": [
                "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset"
            ],
            "Acknowledgements":
            "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset",
            "HowToAcknowledge":
            "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset",
            "Funding":
            ["OPTIONAL. List of sources of funding (grant numbers)"],
            "ReferencesAndLinks": [
                "OPTIONAL. List of references to publication that contain information on the dataset, or links"
            ],
            "DatasetDOI":
            "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)"
        }
        logger.info('Creating dataset description file: ' + dataset_file)
        with open(dataset_file, 'w') as fid:
            json.dump(dataset_description, fid, indent=4)

    # Create a README file if it does not exist
    readme_file = os.path.join(bidsfolder, 'README')
    if not os.path.isfile(readme_file):
        logger.info('Creating README file: ' + readme_file)
        with open(readme_file, 'w') as fid:
            fid.write(
                'A free form text ( README ) describing the dataset in more details that SHOULD be provided'
            )

    # Get the bidsmap heuristics from the bidsmap YAML-file
    bidsmap = bids.get_heuristics(bidsmapfile,
                                  os.path.join(bidsfolder, 'code'), logger)

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv = os.path.join(bidsfolder, 'participants.tsv')
    participants_json = os.path.splitext(participants_tsv)[0] + '.json'
    if os.path.exists(participants_tsv):
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if os.path.exists(participants_json):
        with open(participants_json, 'r') as json_fid:
            participants_dict = json.load(json_fid)
    else:
        participants_dict = dict()

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(rawfolder, subprefix + '*')
    else:
        subjects = [
            subprefix + subject.lstrip(subprefix) for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            os.path.join(rawfolder, subject) for subject in subjects
            if os.path.isdir(os.path.join(rawfolder, subject))
        ]

    # Loop over all subjects and sessions and convert them using the bidsmap entries
    for n, subject in enumerate(subjects, 1):

        if participants and subject in list(participants_table.index):
            logger.info(f'{"-" * 30}')
            logger.info(f'Skipping subject: {subject} ({n}/{len(subjects)})')
            continue

        logger.info(f'{"-"*30}')
        logger.info(f'Coining subject: {subject} ({n}/{len(subjects)})')

        personals = dict()
        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Check if we should skip the session-folder
            if not force and os.path.isdir(
                    session.replace(rawfolder, bidsfolder)):
                continue

            # Update / append the dicom mapping
            if bidsmap['DICOM']:
                coin_dicom(session, bidsmap, bidsfolder, personals, subprefix,
                           sesprefix)

            # Update / append the PAR/REC mapping
            if bidsmap['PAR']:
                coin_par(session, bidsmap, bidsfolder, personals)

            # Update / append the P7 mapping
            if bidsmap['P7']:
                coin_p7(session, bidsmap, bidsfolder, personals)

            # Update / append the nifti mapping
            if bidsmap['Nifti']:
                coin_nifti(session, bidsmap, bidsfolder, personals)

            # Update / append the file-system mapping
            if bidsmap['FileSystem']:
                coin_filesystem(session, bidsmap, bidsfolder, personals)

            # Update / append the plugin mapping
            if bidsmap['PlugIn']:
                coin_plugin(session, bidsmap, bidsfolder, personals)

        # Store the collected personals in the participant_table
        for key in personals:

            # participant_id is the index of the participants_table
            assert 'participant_id' in personals
            if key == 'participant_id':
                continue

            # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file

            if key not in participants_dict:
                participants_dict[key] = dict(
                    LongName='Long (unabbreviated) name of the column',
                    Description='Description of the the column',
                    Levels=dict(
                        Key=
                        'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'
                    ),
                    Units=
                    'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is',
                    TermURL=
                    'URL pointing to a formal definition of this type of data in an ontology available on the web'
                )
            participants_table.loc[personals['participant_id'],
                                   key] = personals[key]

    # Write the collected data to the participant files
    logger.info('Writing subject data to: ' + participants_tsv)
    participants_table.to_csv(participants_tsv,
                              sep='\t',
                              encoding='utf-8',
                              na_rep='n/a')

    logger.info('Writing subject data dictionary to: ' + participants_json)
    with open(participants_json, 'w') as json_fid:
        json.dump(participants_dict, json_fid, indent=4)

    logger.info('log file: ' + logfile)
    logger.info('------------ FINISHED! ------------')
Ejemplo n.º 5
0
def coin_dicom(session: str, bidsmap: dict, bidsfolder: str, personals: dict,
               subprefix: str, sesprefix: str) -> None:
    """
    Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the dicom header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    if not bids.lsdirs(session):
        logger.warning('No series subfolder(s) found in: ' + session)
        return

    TE = [None, None]

    # Get a valid BIDS subject identifier from the (first) dicom-header or from the session source folder
    if bidsmap['DICOM']['participant_label'] and bidsmap['DICOM'][
            'participant_label'].startswith(
                '<<') and bidsmap['DICOM']['participant_label'].endswith('>>'):
        subid = bids.get_dicomfield(
            bidsmap['DICOM']['participant_label'][2:-2],
            bids.get_dicomfile(bids.lsdirs(session)[0]))
    elif bidsmap['DICOM']['participant_label']:
        subid = bidsmap['DICOM']['participant_label']
    else:
        subid = session.rsplit(os.sep + subprefix,
                               1)[1].split(os.sep + sesprefix, 1)[0]
    subid = 'sub-' + bids.cleanup_label(subid.lstrip(subprefix))
    if subid == subprefix:
        logger.error('Error: No valid subject identifier found for: ' +
                     session)
        return

    # Get a valid or empty BIDS session identifier from the (first) dicom-header or from the session source folder
    if bidsmap['DICOM']['session_label'] and bidsmap['DICOM'][
            'session_label'].startswith(
                '<<') and bidsmap['DICOM']['session_label'].endswith('>>'):
        sesid = bids.get_dicomfield(
            bidsmap['DICOM']['session_label'][2:-2],
            bids.get_dicomfile(bids.lsdirs(session)[0]))
    elif bidsmap['DICOM']['session_label']:
        sesid = bidsmap['DICOM']['session_label']
    elif os.sep + sesprefix in session:
        sesid = session.rsplit(os.sep + sesprefix)[1]
    else:
        sesid = ''
    if sesid:
        sesid = 'ses-' + bids.cleanup_label(sesid.lstrip(sesprefix))

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = os.path.join(
        bidsfolder, subid, sesid
    )  # NB: This gives a trailing '/' if ses=='', but that should be ok
    os.makedirs(bidsses, exist_ok=True)
    scans_tsv = os.path.join(bidsses,
                             f'{subid}{bids.add_prefix("_",sesid)}_scans.tsv')
    if os.path.exists(scans_tsv):
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the dicom series subfolders
    for series in bids.lsdirs(session):

        if series.startswith('.'):
            logger.info('Ignoring hidden dicom-folder: ' + series)
            continue
        else:
            logger.info('Processing dicom-folder: ' + series)

        # Get the cleaned-up bids labels from a dicom-file and bidsmap
        dicomfile = bids.get_dicomfile(series)
        if not dicomfile: continue
        result = bids.get_matching_dicomseries(dicomfile, bidsmap)
        series_ = result['series']
        modality = result['modality']

        # Create the BIDS session/modality folder
        bidsmodality = os.path.join(bidsses, modality)
        os.makedirs(bidsmodality, exist_ok=True)

        # Compose the BIDS filename using the bids labels and run-index
        runindex = series_['bids']['run_index']
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.get_bidsname(subid, sesid, modality, series_,
                                         runindex[2:-2])
            bidsname = bids.increment_runindex(bidsmodality, bidsname)
        else:
            bidsname = bids.get_bidsname(subid, sesid, modality, series_,
                                         runindex)

        # Convert the dicom-files in the series folder to nifti's in the BIDS-folder
        command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format(
            path=bidsmap['Options']['dcm2niix']['path'],
            args=bidsmap['Options']['dcm2niix']['args'],
            filename=bidsname,
            outfolder=bidsmodality,
            infolder=series)
        logger.info('$ ' + command)
        process = subprocess.run(
            command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT
        )  # TODO: investigate shell=False and capture_output=True
        logger.info(process.stdout.decode('utf-8'))
        if process.returncode != 0:
            errormsg = f'Error: Failed to process {series} (errorcode {process.returncode})'
            logger.error(errormsg)
            continue

        # Replace uncropped output image with the cropped one
        if '-x y' in bidsmap['Options']['dcm2niix']['args']:
            for filename in sorted(
                    glob.glob(os.path.join(
                        bidsmodality,
                        bidsname + '*_Crop_*'))):  # e.g. *_Crop_1.nii.gz
                basepath, ext1 = os.path.splitext(filename)
                basepath, ext2 = os.path.splitext(
                    basepath)  # Account for .nii.gz files
                basepath = basepath.rsplit('_Crop_', 1)[0]
                newfilename = basepath + ext2 + ext1
                logger.info(
                    f'Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}'
                )
                os.replace(filename, newfilename)

        # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively
        jsonfiles = [
        ]  # Collect the associated json-files (for updating them later) -- possibly > 1
        for suffix in ('_c', '_e', '_ph', '_i'):
            for filename in sorted(
                    glob.glob(
                        os.path.join(bidsmodality,
                                     bidsname + suffix + '[0-9]*'))):
                basepath, ext1 = os.path.splitext(filename)
                basepath, ext2 = os.path.splitext(
                    basepath)  # Account for .nii.gz files
                basepath, index = basepath.rsplit(suffix, 1)
                index = index.split('_')[0].zfill(
                    2
                )  # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files)

                # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file
                if suffix in (
                        '_c', '_e'
                ) and int(index) == 2 and basepath.rsplit(
                        '_', 1
                )[1] != 'magnitude1':  # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below)
                    filename_ce = basepath + ext2 + ext1  # The file without the _c1/_e1 suffix
                    if suffix == '_e' and bids.set_bidslabel(basepath, 'echo'):
                        newbasepath_ce = bids.set_bidslabel(
                            basepath, 'echo', '1')
                    else:
                        newbasepath_ce = bids.set_bidslabel(
                            basepath, 'dummy',
                            suffix.upper() + '1'.zfill(len(index))
                        )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data
                    newfilename_ce = newbasepath_ce + ext2 + ext1  # The file as it should have been
                    if os.path.isfile(filename_ce):
                        if filename_ce != newfilename_ce:
                            logger.info(
                                f'Found no dcm2niix {suffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}'
                            )
                            os.rename(filename_ce, newfilename_ce)
                        if ext1 == '.json':
                            jsonfiles.append(newbasepath_ce + '.json')

                # Patch the basepath with the suffix info
                if suffix == '_e' and bids.set_bidslabel(basepath,
                                                         'echo') and index:
                    basepath = bids.set_bidslabel(
                        basepath, 'echo', str(int(index))
                    )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                elif suffix == '_e' and basepath.rsplit('_', 1)[1] in (
                        'magnitude1',
                        'magnitude2') and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + str(
                        int(index)
                    )  # basepath: *_magnitude1_e[index] -> *_magnitude[index]
                    # Read the echo times that need to be added to the json-file (see below)
                    if os.path.splitext(filename)[1] == '.json':
                        with open(filename, 'r') as json_fid:
                            data = json.load(json_fid)
                        TE[int(index) - 1] = data['EchoTime']
                        logger.info(
                            f"Reading EchoTime{index} = {data['EchoTime']} from: {filename}"
                        )
                elif suffix == '_e' and basepath.rsplit(
                        '_', 1
                )[1] == 'phasediff' and index:  # i.e. modality == 'fmap'
                    pass

                elif suffix == '_ph' and basepath.rsplit('_', 1)[1] in [
                        'phase1', 'phase2'
                ] and index:  # i.e. modality == 'fmap' (TODO: untested)
                    basepath = basepath[0:-1] + str(
                        int(index
                            ))  # basepath: *_phase1_e[index] -> *_phase[index]
                    logger.warning('Untested dcm2niix "_ph"-filetype: ' +
                                   basepath)

                else:
                    basepath = bids.set_bidslabel(
                        basepath, 'dummy',
                        suffix.upper() + index
                    )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data

                # Save the file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        bidsmodality, os.path.basename(basepath), ext2 + ext1
                    )  # Update the runindex now that the acq-label has changed
                else:
                    newbidsname = os.path.basename(basepath)
                newfilename = os.path.join(bidsmodality,
                                           newbidsname + ext2 + ext1)
                logger.info(
                    f'Found dcm2niix {suffix} suffix, renaming\n{filename} ->\n{newfilename}'
                )
                os.rename(filename, newfilename)
                if ext1 == '.json':
                    jsonfiles.append(
                        os.path.join(bidsmodality, newbidsname + '.json'))

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        if not jsonfiles:
            jsonfiles = [os.path.join(bidsmodality, bidsname + '.json')]
        for jsonfile in set(jsonfiles):

            # Check if dcm2niix behaved as expected
            if not os.path.isfile(jsonfile):
                logger.warning(
                    f'Unexpected file conbids.version result: {jsonfile} not found'
                )
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if modality == 'dwi':
                bvecfile = os.path.splitext(jsonfile)[0] + '.bvec'
                bvalfile = os.path.splitext(jsonfile)[0] + '.bval'
                if not os.path.isfile(bvecfile):
                    logger.info('Adding dummy bvec file: ' + bvecfile)
                    with open(bvecfile, 'w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not os.path.isfile(bvalfile):
                    logger.info('Adding dummy bval file: ' + bvalfile)
                    with open(bvalfile, 'w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif modality == 'func':
                with open(jsonfile, 'r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    logger.info('Adding TaskName to: ' + jsonfile)
                    data['TaskName'] = series_['bids']['task_label']
                    with open(jsonfile, 'w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude series have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-series being saved after the magnitude series
            elif modality == 'fmap':
                if series_['bids']['suffix'] == 'phasediff':
                    logger.info('Adding EchoTime1 and EchoTime2 to: ' +
                                jsonfile)
                    with open(jsonfile, 'r') as json_fid:
                        data = json.load(json_fid)
                    data['EchoTime1'] = TE[0]
                    data['EchoTime2'] = TE[1]
                    with open(jsonfile, 'w') as json_fid:
                        json.dump(data, json_fid, indent=4)
                    if TE[0] > TE[1]:
                        logger.warning('EchoTime1 > EchoTime2 in: ' + jsonfile)

            # Parse the acquisition time from the json file
            with open(jsonfile, 'r') as json_fid:
                data = json.load(json_fid)
            acq_time = dateutil.parser.parse(data['AcquisitionTime'])
            niipath = glob.glob(
                os.path.splitext(jsonfile)[0] + '.nii*'
            )[0]  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
            niipath = niipath.replace(
                bidsses + os.sep, ''
            )  # Use a relative path. Somehow .strip(bidsses) instead of replace(bidsses,'') does not work properly
            scans_table.loc[
                niipath,
                'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    logger.info('Writing acquisition time data to: ' + scans_tsv)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap['DICOM']['fmap'] is not None:
        for fieldmap in bidsmap['DICOM']['fmap']:
            if 'IntendedFor' in fieldmap['bids'] and fieldmap['bids'][
                    'IntendedFor']:
                bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap,
                                             '1')
                acqlabel = bids.set_bidslabel(bidsname, 'acq')
                for jsonfile in glob.glob(
                        os.path.join(
                            bidsses, 'fmap',
                            bidsname.replace(
                                '_run-1_', '_run-[0-9]*_').replace(
                                    acqlabel, acqlabel + '[CE][0-9]*') +
                            '.json')
                ):  # Account for multiple runs and dcm2niix suffixes inserted into the acquisition label

                    intendedfor = fieldmap['bids']['IntendedFor']
                    if intendedfor.startswith('<<') and intendedfor.endswith(
                            '>>'):
                        intendedfor = intendedfor[2:-2].split('><')
                    else:
                        intendedfor = [intendedfor]

                    niifiles = []
                    for selector in intendedfor:
                        niifiles.extend([
                            niifile.split(os.sep + subid + os.sep,
                                          1)[1].replace('\\', '/')
                            for niifile in sorted(
                                glob.glob(
                                    os.path.join(
                                        bidsses,
                                        f'**{os.sep}*{selector}*.nii*')))
                        ])  # Search in all series using a relative path

                    with open(jsonfile, 'r') as json_fid:
                        data = json.load(json_fid)
                    data['IntendedFor'] = niifiles
                    logger.info('Adding IntendedFor to: ' + jsonfile)
                    with open(jsonfile, 'w') as json_fid:
                        json.dump(data, json_fid, indent=4)

                # Catch magnitude2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same seriesfolder)
                if jsonfile.endswith('magnitude1.json'):
                    jsonfile2 = jsonfile.rsplit('1.json', 1)[0] + '2.json'
                    if os.path.isfile(jsonfile2):

                        with open(jsonfile2, 'r') as json_fid:
                            data = json.load(json_fid)
                        if 'IntendedFor' not in data:
                            data['IntendedFor'] = niifiles
                            logger.info('Adding IntendedFor to: ' + jsonfile2)
                            with open(jsonfile2, 'w') as json_fid:
                                json.dump(data, json_fid, indent=4)

    # Collect personal data from the DICOM header
    dicomfile = bids.get_dicomfile(series)
    personals['participant_id'] = subid
    if sesid:
        personals['session_id'] = sesid
    personals['age'] = bids.get_dicomfield('PatientAge', dicomfile)
    personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile)
    personals['size'] = bids.get_dicomfield('PatientSize', dicomfile)
    personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
Ejemplo n.º 6
0
def bidsmapper(rawfolder, bidsfolder, bidsmapfile='bidsmap_sample.yaml', automatic=False):
    """
    Main function that processes all the subjects and session in the rawfolder
    and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code.
    Folders in rawfolder are assumed to contain a single dataset.

    :param str rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param str bidsfolder:      The name of the BIDS root folder
    :param str bidsmapfile:     The name of the bidsmap YAML-file
    :param bool automatic:      If True, the user will not be asked for help if an unknown series is encountered
    :return: str bidsmapfile:   The name of the mapped bidsmap YAML-file
    :rtype: str
    """

    # Input checking
    rawfolder  = os.path.abspath(os.path.expanduser(rawfolder))
    bidsfolder = os.path.abspath(os.path.expanduser(bidsfolder))

    # Get the heuristics for creating the bidsmap
    heuristics = bids.get_heuristics(bidsmapfile, os.path.join(bidsfolder,'code'))

    # Create a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists)
    bidsmap = copy.deepcopy(heuristics)
    for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'):
        for modality in bids.bidsmodalities + (bids.unknownmodality,):

            if bidsmap[logic] and modality in bidsmap[logic]:
                bidsmap[logic][modality] = None

    # Loop over all subjects and sessions and built up the bidsmap entries
    subjects = bids.lsdirs(rawfolder, 'sub-*')
    for subject in subjects:

        sessions = bids.lsdirs(subject, 'ses-*')
        if not sessions: sessions = subject
        for session in sessions:

            print('Parsing: ' + session)

            for series in bids.lsdirs(session):

                # Update / append the dicom mapping
                if heuristics['DICOM']:
                    dicomfile = bids.get_dicomfile(series)
                    bidsmap   = built_dicommap(dicomfile, bidsmap, heuristics, automatic)

                # Update / append the PAR/REC mapping
                if heuristics['PAR']:
                    parfile   = bids.get_parfile(series)
                    bidsmap   = built_parmap(parfile, bidsmap, heuristics, automatic)

                # Update / append the P7 mapping
                if heuristics['P7']:
                    p7file    = bids.get_p7file(series)
                    bidsmap   = built_p7map(p7file, bidsmap, heuristics, automatic)

                # Update / append the nifti mapping
                if heuristics['Nifti']:
                    niftifile = bids.get_niftifile(series)
                    bidsmap   = built_niftimap(niftifile, bidsmap, heuristics, automatic)

                # Update / append the file-system mapping
                if heuristics['FileSystem']:
                    bidsmap   = built_filesystemmap(series, bidsmap, heuristics, automatic)

                # Update / append the plugin mapping
                if heuristics['PlugIn']:
                    bidsmap   = built_pluginmap(series, bidsmap)

    # Create the bidsmap YAML-file in bidsfolder/code
    os.makedirs(os.path.join(bidsfolder,'code'), exist_ok=True)
    bidsmapfile = os.path.join(bidsfolder,'code','bidsmap.yaml')

    # Save the bidsmap to the bidsmap YAML-file
    print('Writing bidsmap to: ' + bidsmapfile)
    with open(bidsmapfile, 'w') as stream:
        yaml.dump(bidsmap, stream)

    return bidsmapfile
Ejemplo n.º 7
0
def rawmapper(rawfolder: str, outfolder: str='', sessions: list=[], rename: bool=False, dicomfield: tuple=('PatientComments',), wildcard: str='*', subprefix: str='sub-', sesprefix: str='ses-', dryrun: bool=False) -> None:
    """
    :param rawfolder:   The root folder-name of the sub/ses/data/file tree containing the source data files
    :param outfolder:   The name of the folder where the mapping-file is saved (default = rawfolder)
    :param sessions:    Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected
    :param rename:      Flag for renaming the sub-subid folders to sub-dicomfield
    :param dicomfield:  The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval)
    :param wildcard:    The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :param dryrun:      Flag for dry-running renaming the sub-subid folders
    :return:            Nothing
    """

    # Input checking
    if not outfolder:
        outfolder = rawfolder
    rawfolder = os.path.abspath(os.path.expanduser(rawfolder))
    outfolder = os.path.abspath(os.path.expanduser(outfolder))

    # Create or append the output to a mapper logfile
    if not dryrun:
        mapperfile = os.path.join(outfolder, 'rawmapper_{}.tsv'.format('_'.join(dicomfield)))
        if rename:
            with open(mapperfile, 'a') as fid:
                fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'newsubid', 'newsesid'))
        else:
            with open(mapperfile, 'x') as fid:
                fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid', 'seriesname', '\t'.join(dicomfield)))

    # Map the sessions in the rawfolder
    if not sessions:
        sessions = glob.glob(os.path.join(rawfolder, f'{subprefix}*{os.sep}{sesprefix}*'))
        if not sessions:
            sessions = glob.glob(os.path.join(rawfolder, f'{subprefix}*'))      # Try without session-subfolders
    else:
        sessions = [sessionitem for session in sessions for sessionitem in glob.glob(os.path.join(rawfolder, session), recursive=True)]

    # Loop over the selected sessions in the rawfolder
    for session in sessions:

        # Get the subject and session identifiers from the raw folder
        subid = subprefix + session.rsplit(os.sep+subprefix, 1)[1].split(os.sep+sesprefix, 1)[0]
        sesid = sesprefix + session.rsplit(os.sep+sesprefix)[1]                                         # TODO: Fix crashing on session-less datasets

        # Parse the new subject and session identifiers from the dicomfield
        series = bids.lsdirs(session, wildcard)
        if not series:
            series = ''
            dcmval = ''
        else:
            series = series[0]                                                                          # TODO: loop over series?
            dcmval = ''
            for dcmfield in dicomfield:
                dcmval = dcmval + '/' + str(bids.get_dicomfield(dcmfield, bids.get_dicomfile(series)))
            dcmval = dcmval[1:]

        # Rename the session subfolder in the rawfolder and print & save this info
        if rename:

            # Get the new subid and sesid
            if not dcmval or dcmval=='None':
                warnings.warn('Skipping renaming because the dicom-field was empty for: ' + session)
                continue
            else:
                if '/' in dcmval:               # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments)
                    delim = '/'
                elif '\\' in dcmval:
                    delim = '\\'
                else:
                    delim = '\r\n'
                newsubsesid = [val for val in dcmval.split(delim) if val]   # Skip empty lines / entries
                newsubid    = subprefix + bids.cleanup_label(newsubsesid[0].lstrip(subprefix))
                if newsubid==subprefix or newsubid==subprefix+'None':
                    newsubid = subid
                    warnings.warn('Could not rename {} because the dicom-field was empty for: {}'.format(subid, session))
                if len(newsubsesid)==1:
                    newsesid = sesid
                elif len(newsubsesid)==2:
                    newsesid = sesprefix + bids.cleanup_label(newsubsesid[1].lstrip(sesprefix))
                else:
                    warnings.warn('Skipping renaming of {} because the dicom-field "{}" could not be parsed into [subid, sesid]'.format(session, dcmval))
                    continue
                if newsesid==sesprefix or newsesid==subprefix+'None':
                    newsesid = sesid
                    warnings.warn('Could not rename {} because the dicom-field was empty for: {}'.format(sesid, session))

            # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists)
            newsession = os.path.join(rawfolder, newsubid, newsesid)
            print(session + ' -> ' + newsession)
            if newsession == session:
                continue
            if os.path.isdir(newsession):
                warnings.warn('{} already exists, skipping renaming of {}'.format(newsession, session))
            elif not dryrun:
                with open(os.path.join(outfolder, mapperfile), 'a') as fid:
                    fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, newsubid, newsesid))
                os.renames(session, newsession)

        # Print & save the dicom values
        else:
            print('{}{}{}\t-> {}'.format(subid+os.sep, sesid+os.sep, os.path.basename(series), '\t'.join(dcmval.split('/'))))
            if not dryrun:
                with open(os.path.join(outfolder, mapperfile), 'a') as fid:
                    fid.write('{}\t{}\t{}\t{}\n'.format(subid, sesid, os.path.basename(series), '\t'.join(dcmval.split('/'))))
Ejemplo n.º 8
0
def sortsessions(session: Path,
                 subprefix: str = '',
                 sesprefix: str = '',
                 dicomfield: str = 'SeriesDescription',
                 rename: bool = False,
                 ext: str = '',
                 nosort: bool = False,
                 pattern: str = '.*\.(IMA|dcm)$',
                 dryrun: bool = False) -> None:
    """

    :param session:     The root folder containing the source [sub/][ses/]dicomfiles or the DICOMDIR file
    :param subprefix:   The prefix for searching the sub folders in session
    :param sesprefix:   The prefix for searching the ses folders in sub folder
    :param dicomfield:  The dicomfield that is used to construct the series folder name (e.g. SeriesDescription or ProtocolName, which are both used as fallback)
    :param rename:      Boolean to rename the DICOM files to a PatientName_SeriesNumber_SeriesDescription_AcquisitionNumber_InstanceNumber scheme
    :param ext:         The file extension after sorting (empty value keeps original file extension)
    :param nosort:      Boolean to skip sorting of DICOM files into SeriesNumber-SeriesDescription directories (useful in combination with -r for renaming only)
    :param pattern:     The regular expression pattern used in re.match() to select the dicom files
    :param dryrun:      Boolean to just display the action
    :return:            Nothing
    """

    # Input checking
    session = Path(session)

    # Start logging
    bids.setup_logging()

    # Do a recursive call if subprefix is given
    if subprefix:

        for subfolder in bids.lsdirs(session, subprefix + '*'):
            if sesprefix:
                sessionfolders = bids.lsdirs(subfolder, sesprefix + '*')
            else:
                sessionfolders = [subfolder]

            for sessionfolder in sessionfolders:
                sortsessions(session=sessionfolder,
                             dicomfield=dicomfield,
                             rename=rename,
                             ext=ext,
                             nosort=nosort,
                             pattern=pattern,
                             dryrun=dryrun)

    # Use the DICOMDIR file if it is there
    if (session / 'DICOMDIR').is_file():

        dicomdir = pydicom.dcmread(str(session / 'DICOMDIR'))

        sessionfolder = session
        for patient in dicomdir.patient_records:
            if len(dicomdir.patient_records) > 1:
                sessionfolder = session / f"sub-{cleanup(patient.PatientName)}"

            for n, study in enumerate(patient.children,
                                      1):  # TODO: Check order
                if len(patient.children) > 1:
                    sessionfolder = session / f"ses-{n:02}{cleanup(study.StudyDescription)}"  # TODO: Leave out StudyDescription? Include PatientName/StudiesDescription?
                    LOGGER.warning(
                        f"The session index-number '{n:02}' is not necessarily meaningful: {sessionfolder}"
                    )

                dicomfiles = [
                    session.joinpath(*image.ReferencedFileID)
                    for series in study.children for image in series.children
                ]
                sortsession(sessionfolder, dicomfiles, dicomfield, rename, ext,
                            nosort, dryrun)

    else:

        dicomfiles = [
            dcmfile for dcmfile in session.iterdir()
            if dcmfile.is_file() and re.match(pattern, str(dcmfile))
        ]
        sortsession(session, dicomfiles, dicomfield, rename, ext, nosort,
                    dryrun)
Ejemplo n.º 9
0
def coin_dicom(session, bidsmap, bidsfolder, personals):
    """
    Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the dicom header

    :param str session:    The full-path name of the subject/session source folder
    :param dict bidsmap:   The full mapping heuristics from the bidsmap YAML-file
    :param str bidsfolder: The full-path name of the BIDS root-folder
    :param dict personals: The dictionary with the personal information
    :return:               Nothing
    :rtype: NoneType
    """

    global logfile
    TE = [None, None]

    # Get a valid BIDS subject identifier from the (first) dicom-header or from the session source folder
    if bidsmap['DICOM']['participant_label'] and bidsmap['DICOM'][
            'participant_label'].startswith(
                '<<') and bidsmap['DICOM']['participant_label'].endswith('>>'):
        subid = 'sub-' + bids.get_dicomfield(
            bidsmap['DICOM']['participant_label'][2:-2],
            bids.get_dicomfile(bids.lsdirs(session)[0]))
    elif bidsmap['DICOM']['participant_label']:
        subid = 'sub-' + bidsmap['DICOM']['participant_label']
    else:
        subid = 'sub-' + session.rsplit(os.sep + 'sub-', 1)[1].split(
            os.sep + 'ses-', 1)[0]
    if subid == 'sub-':
        bids.printlog(
            'Error: No valid subject identifier found for: ' + session,
            logfile)
        return

    # Get a BIDS session identifier from the (first) dicom-header or from the session source folder
    if bidsmap['DICOM']['session_label'] and bidsmap['DICOM'][
            'session_label'].startswith(
                '<<') and bidsmap['DICOM']['session_label'].endswith('>>'):
        sesid = 'ses-' + bids.get_dicomfield(
            bidsmap['DICOM']['session_label'][2:-2],
            bids.get_dicomfile(bids.lsdirs(session)[0]))
    elif bidsmap['DICOM']['session_label']:
        sesid = 'ses-' + bidsmap['DICOM']['session_label']
    elif os.sep + 'ses-' in session:
        sesid = 'ses-' + session.rsplit(os.sep + 'ses-')[1]
    else:
        sesid = ''

    # Create the BIDS session-folder
    bidsses = os.path.join(
        bidsfolder, subid, sesid
    )  # NB: This gives a trailing '/' if ses=='', but that should be ok
    os.makedirs(bidsses, exist_ok=True)

    # Process all the dicom series subfolders
    for series in bids.lsdirs(session):

        bids.printlog('Processing dicom-folder: ' + series, logfile)

        # Get the cleaned-up bids labels from a dicom-file and bidsmap
        dicomfile = bids.get_dicomfile(series)
        result = bids.get_matching_dicomseries(dicomfile, bidsmap)
        series_ = result['series']
        modality = result['modality']

        # Create the BIDS session/modality folder
        bidsmodality = os.path.join(bidsses, modality)
        os.makedirs(bidsmodality, exist_ok=True)

        # Compose the BIDS filename using the bids labels and run-index
        runindex = series_['run_index']
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.get_bidsname(subid, sesid, modality, series_,
                                         runindex[2:-2])
            bidsname = bids.increment_runindex(bidsmodality, bidsname)
        else:
            bidsname = bids.get_bidsname(subid, sesid, modality, series_,
                                         runindex)

        # Convert the dicom-files in the series folder to nifti's in the BIDS-folder
        command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format(
            path=bidsmap['Options']['dcm2niix']['path'],
            args=bidsmap['Options']['dcm2niix']['args'],
            filename=bidsname,
            outfolder=bidsmodality,
            infolder=series)
        bids.printlog('$ ' + command, logfile)
        process = subprocess.run(
            command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT
        )  # TODO: investigate shell=False and capture_output=True
        bids.printlog(process.stdout.decode('utf-8'), logfile)
        if process.returncode != 0:
            errormsg = 'Error: Failed to process {} (errorcode {})'.format(
                series, process.returncode)
            bids.printlog(errormsg, logfile)
            continue

        # Rename all files ending with _c%d, _e%d and _ph: These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively
        jsonfiles = [
        ]  # Collect the associated json-files (for updating them later)
        for suffix in ('_c', '_e', '_ph'):
            for filename in sorted(
                    glob.glob(
                        os.path.join(bidsmodality, bidsname + suffix + '*'))):
                basepath, ext1 = os.path.splitext(filename)
                basepath, ext2 = os.path.splitext(
                    basepath)  # Account for .nii.gz files
                basepath, index = basepath.rsplit(suffix, 1)

                if suffix == '_e' and bids.set_bidslabel(basepath,
                                                         'echo') and index:
                    basepath = bids.set_bidslabel(basepath, 'echo', index)

                elif suffix == '_e' and basepath.rsplit('_', 1)[1] in [
                        'magnitude1', 'magnitude2'
                ] and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + index
                    # Read the echo times that need to be added to the json-file (see below)
                    if os.path.splitext(filename)[1] == '.json':
                        with open(filename, 'r') as json_fid:
                            data = json.load(json_fid)
                        TE[int(index) - 1] = data['EchoTime']
                        bids.printlog(
                            'Reading EchoTime{} = {} from: {}'.format(
                                index, data['EchoTime'], filename), logfile)
                elif suffix == '_e' and basepath.rsplit(
                        '_', 1
                )[1] == 'phasediff' and index:  # i.e. modality == 'fmap'
                    pass

                elif suffix == '_ph' and basepath.rsplit('_', 1)[1] in [
                        'phase1', 'phase2'
                ] and index:  # i.e. modality == 'fmap' (TODO: untested)
                    basepath = basepath[0:-1] + index
                    bids.printlog(
                        'WARNING: Untested dcm2niix "_ph"-filetype: ' +
                        basepath, logfile)

                else:
                    basepath = bids.set_bidslabel(
                        basepath, 'dummy',
                        suffix.upper() + index
                    )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data

                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        bidsmodality, os.path.basename(basepath), ext2 + ext1
                    )  # Update the runindex now that the acq-label has changed
                else:
                    newbidsname = os.path.basename(basepath)
                newfilename = os.path.join(bidsmodality,
                                           newbidsname + ext2 + ext1)
                bids.printlog(
                    'Found dcm2niix {} suffix, renaming\n{} ->\n{}'.format(
                        suffix, filename, newfilename), logfile)
                os.rename(filename, newfilename)
                if ext1 == '.json':
                    jsonfiles.append(
                        os.path.join(bidsmodality, newbidsname + '.json'))

        # Loop over and adapt all the newly produced json files (every nifti file comes with a json file)
        if not jsonfiles:
            jsonfiles = [os.path.join(bidsmodality, bidsname + '.json')]

        for jsonfile in jsonfiles:

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if modality == 'dwi':
                bvecfile = os.path.splitext(jsonfile)[0] + '.bvec'
                bvalfile = os.path.splitext(jsonfile)[0] + '.bval'
                if not os.path.isfile(bvecfile):
                    with open(bvecfile, 'w') as bvec_fid:
                        bids.printlog('Adding dummy bvec file: ' + bvecfile,
                                      logfile)
                        bvec_fid.write('0\n0\n0\n')
                if not os.path.isfile(bvalfile):
                    with open(bvalfile, 'w') as bval_fid:
                        bids.printlog('Adding dummy bval file: ' + bvalfile,
                                      logfile)
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif modality == 'func':
                with open(jsonfile, 'r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    bids.printlog('Adding TaskName to: ' + jsonfile, logfile)
                    with open(jsonfile, 'w') as json_fid:
                        data['TaskName'] = series_['task_label']
                        json.dump(data, json_fid, indent=4)

            # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude series have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-series being saved after the magnitude series
            elif modality == 'fmap':
                if series_['suffix'] == 'phasediff':
                    bids.printlog(
                        'Adding EchoTime1 and EchoTime2 to: ' + jsonfile,
                        logfile)
                    with open(jsonfile, 'r') as json_fid:
                        data = json.load(json_fid)
                    data['EchoTime1'] = TE[0]
                    data['EchoTime2'] = TE[1]
                    with open(jsonfile, 'w') as json_fid:
                        json.dump(data, json_fid, indent=4)
                    if TE[0] > TE[1]:
                        bids.printlog(
                            'WARNING: EchoTime1 > EchoTime2 in: ' + jsonfile,
                            logfile)

    # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap['DICOM']['fmap'] is not None:
        for fieldmap in bidsmap['DICOM']['fmap']:
            if 'IntendedFor' in fieldmap and fieldmap['IntendedFor']:
                jsonfile = os.path.join(
                    bidsses, 'fmap',
                    bids.get_bidsname(subid, sesid, 'fmap', fieldmap, '1') +
                    '.json'
                )  # TODO: Assumes that there is only 1 fieldmap acquired for each bidsmap entry / series
                if not os.path.isfile(jsonfile):
                    continue
                intendedfor = fieldmap['IntendedFor']
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                else:
                    intendedfor = [intendedfor]
                with open(jsonfile, 'r') as json_fid:
                    data = json.load(json_fid)
                niifiles = [
                    niifile.split(os.sep + subid + os.sep, 1)[1]
                    for niifile in sorted(
                        glob.glob(
                            os.path.join(
                                bidsses, '**' + os.sep + '*' +
                                '*'.join(intendedfor) + '*.nii*')))
                ]  # Use a relative path
                data['IntendedFor'] = niifiles
                bids.printlog('Adding IntendedFor to: ' + jsonfile, logfile)
                with open(jsonfile, 'w') as json_fid:
                    json.dump(data, json_fid, indent=4)

                # Catch magnitude2 files produced by dcm2niix
                if jsonfile.endswith('magnitude1.json'):
                    jsonfile2 = jsonfile.rsplit('1.json', 1)[0] + '2.json'
                    if os.path.isfile(jsonfile2):
                        with open(jsonfile2, 'r') as json_fid:
                            data = json.load(json_fid)
                        data['IntendedFor'] = niifiles
                        bids.printlog('Adding IntendedFor to: ' + jsonfile2,
                                      logfile)
                        with open(jsonfile2, 'w') as json_fid:
                            json.dump(data, json_fid, indent=4)

    # Collect personal data from the DICOM header
    dicomfile = bids.get_dicomfile(series)
    personals['participant_id'] = subid
    if sesid:
        personals[
            'session_id'] = sesid  # TODO: Check if this can be in the participants.tsv file according to BIDS
    personals['age'] = bids.get_dicomfield('PatientAge', dicomfile)
    personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile)
    personals['size'] = bids.get_dicomfield('PatientSize', dicomfile)
    personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
Ejemplo n.º 10
0
def rawmapper(rawfolder,
              outfolder: Path = Path(),
              sessions: list = [],
              rename: bool = False,
              dicomfield: tuple = ('PatientComments', ),
              wildcard: str = '*',
              subprefix: str = 'sub-',
              sesprefix: str = 'ses-',
              dryrun: bool = False) -> None:
    """
    :param rawfolder:   The root folder-name of the sub/ses/data/file tree containing the source data files
    :param outfolder:   The name of the folder where the mapping-file is saved (default = sourcefolder)
    :param sessions:    Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected
    :param rename:      Flag for renaming the sub-subid folders to sub-dicomfield
    :param dicomfield:  The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval)
    :param wildcard:    The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :param dryrun:      Flag for dry-running renaming the sub-subid folders
    :return:            Nothing
    """

    # Input checking
    rawfolder = Path(rawfolder)
    if not outfolder:
        outfolder = rawfolder
        print(f"Outfolder: {outfolder}")
    outfolder = Path(outfolder)

    # Create or append the output to a mapper logfile
    mapperfile = outfolder / f"rawmapper_{'_'.join(dicomfield)}.tsv"
    if not dryrun:
        if rename:
            with mapperfile.open('a') as fid:
                fid.write('subid\tsesid\tnewsubid\tnewsesid\n')
        else:
            with mapperfile.open('x') as fid:
                fid.write('subid\tsesid\tseriesname\t{}\n'.format(
                    '\t'.join(dicomfield)))

    # Map the sessions in the sourcefolder
    if not sessions:
        sessions = list(rawfolder.glob(f"{subprefix}*/{sesprefix}*"))
        if not sessions:
            sessions = rawfolder.glob(
                f"{subprefix}*")  # Try without session-subfolders
    else:
        sessions = [
            sessionitem for session in sessions
            for sessionitem in rawfolder.rglob(session)
        ]

    # Loop over the selected sessions in the sourcefolder
    for session in sessions:

        # Get the subject and session identifiers from the raw folder
        subid, sesid = bids.get_subid_sesid(session)

        # Parse the new subject and session identifiers from the dicomfield
        series = bids.lsdirs(session, wildcard)
        if not series:
            series = ''
            dcmval = ''
        else:
            series = series[0]  # TODO: loop over series?
            dcmval = ''
            for dcmfield in dicomfield:
                dcmval = dcmval + '/' + str(
                    bids.get_dicomfield(dcmfield, bids.get_dicomfile(series)))
            dcmval = dcmval[1:]

        # Rename the session subfolder in the sourcefolder and print & save this info
        if rename:

            # Get the new subid and sesid
            if not dcmval or dcmval == 'None':
                warnings.warn(
                    f"Skipping renaming because the dicom-field was empty for: {session}"
                )
                continue
            else:
                if '/' in dcmval:  # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments)
                    delim = '/'
                elif '\\' in dcmval:
                    delim = '\\'
                else:
                    delim = '\r\n'
                newsubsesid = [val for val in dcmval.split(delim)
                               if val]  # Skip empty lines / entries
                newsubid = subprefix + bids.cleanup_value(
                    re.sub(f'^{subprefix}', '', newsubsesid[0]))
                if newsubid == subprefix or newsubid == subprefix + 'None':
                    newsubid = subid
                    warnings.warn(
                        'Could not rename {} because the dicom-field was empty for: {}'
                        .format(subid, session))
                if len(newsubsesid) == 1:
                    newsesid = sesid
                elif len(newsubsesid) == 2:
                    newsesid = sesprefix + bids.cleanup_value(
                        re.sub(f'^{sesprefix}', '', newsubsesid[1]))
                else:
                    warnings.warn(
                        f"Skipping renaming of {session} because the dicom-field '{dcmval}' could not be parsed into [subid, sesid]"
                    )
                    continue
                if newsesid == sesprefix or newsesid == subprefix + 'None':
                    newsesid = sesid
                    warnings.warn(
                        f"Could not rename {sesid} because the dicom-field was empty for: {session}"
                    )

            # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists)
            newsession = rawfolder / newsubid / newsesid
            print(f"{session} -> {newsession}")
            if newsession == session:
                continue
            if newsession.is_dir():
                warnings.warn(
                    f"{newsession} already exists, skipping renaming of {session}"
                )
            elif not dryrun:
                with mapperfile.open('a') as fid:
                    fid.write(f"{subid}\t{sesid}\t{newsubid}\t{newsesid}\n")
                session.rename(newsession)

        # Print & save the dicom values
        else:
            print('{}/{}/{}\t-> {}'.format(subid, sesid, series.name,
                                           '\t'.join(dcmval.split('/'))))
            if not dryrun:
                with mapperfile.open('a') as fid:
                    fid.write('{}\t{}\t{}\t{}\n'.format(
                        subid, sesid, series.name,
                        '\t'.join(dcmval.split('/'))))
Ejemplo n.º 11
0
def bidsmapper(rawfolder: str,
               bidsfolder: str,
               bidsmapfile: str,
               templatefile: str,
               subprefix: str = 'sub-',
               sesprefix: str = 'ses-',
               interactive: bool = True) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder
    and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin.
    Folders in sourcefolder are assumed to contain a single dataset.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param bidsmapfile:     The name of the bidsmap YAML-file
    :param templatefile:    The name of the bidsmap template YAML-file
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :param interactive:     If True, the user will be asked for help if an unknown run is encountered
    :return:bidsmapfile:    The name of the mapped bidsmap YAML-file
    """

    # Input checking
    rawfolder = Path(rawfolder)
    bidsfolder = Path(bidsfolder)
    bidsmapfile = Path(bidsmapfile)
    templatefile = Path(templatefile)

    # Start logging
    bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidsmapper.log')
    LOGGER.info('')
    LOGGER.info('-------------- START BIDSmapper ------------')
    LOGGER.info(
        f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} "
        f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} interactive={interactive}"
    )

    # Get the heuristics for filling the new bidsmap
    bidsmap_old, _ = bids.load_bidsmap(bidsmapfile,
                                       bidsfolder / 'code' / 'bidscoin')
    template, _ = bids.load_bidsmap(templatefile,
                                    bidsfolder / 'code' / 'bidscoin')

    # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists)
    if bidsmap_old:
        bidsmap_new = copy.deepcopy(bidsmap_old)
    else:
        bidsmap_new = copy.deepcopy(template)
    for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'):
        for modality in bids.bidsmodalities + (bids.unknownmodality,
                                               bids.ignoremodality):
            if bidsmap_new[logic] and modality in bidsmap_new[logic]:
                bidsmap_new[logic][modality] = None

    # Start with an empty skeleton if we didn't have an old bidsmap
    if not bidsmap_old:
        bidsmap_old = copy.deepcopy(bidsmap_new)

    # Start the Qt-application
    gui = interactive
    if gui:
        app = QApplication(sys.argv)
        app.setApplicationName('BIDS editor')
        mainwin = bidseditor.MainWindow()
        gui = bidseditor.Ui_MainWindow()
        gui.interactive = interactive
        gui.subprefix = subprefix
        gui.sesprefix = sesprefix

        if gui.interactive == 2:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                f"The bidsmapper will now scan {bidsfolder} and whenever "
                f"it detects a new type of scan it will ask you to identify it.\n\n"
                f"It is important that you choose the correct BIDS modality "
                f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n"
                f"At the end you will be shown an overview of all the "
                f"different scan types and BIDScoin options (as in the "
                f"bidseditor) that you can then (re)edit to your needs")

    # Loop over all subjects and sessions and built up the bidsmap entries
    subjects = bids.lsdirs(rawfolder, subprefix + '*')
    if not subjects:
        LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*')
        gui = None
    for n, subject in enumerate(subjects, 1):

        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})")

            for runfolder in bids.lsdirs(session):

                # Update / append the dicom mapping
                if bidsmap_old['DICOM']:
                    bidsmap_new = build_dicommap(runfolder, bidsmap_new,
                                                 bidsmap_old, template, gui)

                # Update / append the PAR/REC mapping
                if bidsmap_old['PAR']:
                    bidsmap_new = build_parmap(runfolder, bidsmap_new,
                                               bidsmap_old)

                # Update / append the P7 mapping
                if bidsmap_old['P7']:
                    bidsmap_new = build_p7map(runfolder, bidsmap_new,
                                              bidsmap_old)

                # Update / append the nifti mapping
                if bidsmap_old['Nifti']:
                    bidsmap_new = build_niftimap(runfolder, bidsmap_new,
                                                 bidsmap_old)

                # Update / append the file-system mapping
                if bidsmap_old['FileSystem']:
                    bidsmap_new = build_filesystemmap(runfolder, bidsmap_new,
                                                      bidsmap_old)

                # Update / append the plugin mapping
                if bidsmap_old['PlugIns']:
                    bidsmap_new = build_pluginmap(runfolder, bidsmap_new,
                                                  bidsmap_old)

    # Create the bidsmap YAML-file in bidsfolder/code/bidscoin
    bidsmapfile = bidsfolder / 'code' / 'bidscoin' / 'bidsmap.yaml'
    bidsmapfile.parent.mkdir(parents=True, exist_ok=True)

    # Save the bidsmap to the bidsmap YAML-file
    bids.save_bidsmap(bidsmapfile, bidsmap_new)

    # (Re)launch the bidseditor UI_MainWindow
    if gui:
        QMessageBox.information(
            mainwin, 'BIDS mapping workflow',
            f"The bidsmapper has finished scanning {rawfolder}\n\n"
            f"Please carefully check all the different BIDS output names "
            f"and BIDScoin options and (re)edit them to your needs.\n\n"
            f"You can always redo this step later by re-running the "
            f"bidsmapper or by just running the bidseditor tool")

        LOGGER.info('Opening the bidseditor')
        gui.setupUi(mainwin,
                    bidsfolder,
                    rawfolder,
                    bidsmapfile,
                    bidsmap_new,
                    copy.deepcopy(bidsmap_new),
                    template,
                    subprefix=subprefix,
                    sesprefix=sesprefix)
        mainwin.show()
        app.exec()

    LOGGER.info('-------------- FINISHED! -------------------')
    LOGGER.info('')

    bids.reporterrors()
Ejemplo n.º 12
0
def bidsparticipants(rawfolder: str,
                     bidsfolder: str,
                     keys: str,
                     subprefix: str = 'sub-',
                     sesprefix: str = 'ses-',
                     dryrun: bool = False) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder to (re)generate the particpants.tsv file in the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param keys:            The keys that are extracted fro mthe source data when populating the participants.tsv file
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :param dryrun:          Boolean to just display the participants info
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()

    # Start logging
    if dryrun:
        bids.setup_logging()
    else:
        bids.setup_logging(bidsfolder / 'code' / 'bidscoin' /
                           'bidsparticipants.log')
    LOGGER.info('')
    LOGGER.info(
        f"-------------- START bidsparticipants {bids.version()} ------------")
    LOGGER.info(
        f">>> bidsparticipants sourcefolder={rawfolder} bidsfolder={bidsfolder} subprefix={subprefix} sesprefix={sesprefix}"
    )

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv = bidsfolder / 'participants.tsv'
    participants_json = participants_tsv.with_suffix('.json')
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    else:
        participants_dict = {
            'participant_id': {
                'Description': 'Unique participant identifier'
            }
        }

    # Get the list of subjects
    subjects = bids.lsdirs(bidsfolder, 'sub-*')
    if not subjects:
        LOGGER.warning(f"No subjects found in: {bidsfolder}")

    # Remove obsolete participants from the participants table
    for participant in participants_table.index:
        if participant not in subjects:
            participants_table = participants_table.drop(participant)

    # Loop over all subjects in the bids-folder and add them to the participants table
    for n, subject in enumerate(subjects, 1):

        LOGGER.info(
            f"------------------- Subject {n}/{len(subjects)} -------------------"
        )
        personals = dict()
        subid, sesid = bids.get_subid_sesid(subject / 'dum.my')
        subject = rawfolder / subid.replace(
            'sub-', subprefix
        )  # TODO: This assumes that the subject-ids in the rawfolder did not contain BIDS-invalid characters (such as '_')
        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not subject.is_dir():
            LOGGER.error(f"Could not find source-folder: {subject}")
            continue
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
            session, unpacked = bids.unpack(session, subprefix, sesprefix, '*')

            LOGGER.info(f"Scanning session: {session}")

            # Update / append the sourde data mapping
            success = scanparticipant('DICOM', session, personals, subid,
                                      sesid)

            # Clean-up the temporary unpacked data
            if unpacked:
                shutil.rmtree(session)

            if success:
                break

        # Store the collected personals in the participant_table
        for key in personals:

            # participant_id is the index of the participants_table
            assert 'participant_id' in personals
            if key == 'participant_id' or key not in keys:
                continue

            # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file

            if key not in participants_dict:
                participants_dict[key] = dict(
                    LongName='Long (unabbreviated) name of the column',
                    Description='Description of the the column',
                    Levels=dict(
                        Key=
                        'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'
                    ),
                    Units=
                    'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED',
                    TermURL=
                    'URL pointing to a formal definition of this type of data in an ontology available on the web'
                )
            participants_table.loc[personals['participant_id'],
                                   key] = personals[key]

    # Write the collected data to the participant files
    LOGGER.info(f"Writing subject data to: {participants_tsv}")
    if not dryrun:
        participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                     sep='\t',
                                                     encoding='utf-8',
                                                     na_rep='n/a')

    LOGGER.info(f"Writing subject data dictionary to: {participants_json}")
    if not dryrun:
        with participants_json.open('w') as json_fid:
            json.dump(participants_dict, json_fid, indent=4)

    print(participants_table)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bids.reporterrors()
Ejemplo n.º 13
0
def scanparticipant(dataformat: str, session: Path, personals: dict,
                    subid: str, sesid: str) -> bool:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param session:     The full-path name of the subject/session source file/folder
    :param personals:   The dictionary with the personal information
    :param subid:       The subject-id from the bids-folder
    :param sesid:       The session-id from the bids-folder
    :return:            True if successful
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    sourcefile = Path()
    if dataformat == 'DICOM':
        sources = bids.lsdirs(session)
        for source in sources:
            sourcefile = bids.get_dicomfile(source)
            if sourcefile.name:
                break

    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        if sources:
            sourcefile = sources[0]

    else:
        LOGGER.error(
            f"Unsupported data format: {dataformat}\nPlease report this bug")
        return False

    if not sources:
        LOGGER.info(f"No data found for: {session}")
        return False

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat == 'DICOM' and sourcefile.name:
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return False  # Only from the first session -> BIDS specification
        age = bids.get_dicomfield(
            'PatientAge', sourcefile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile)
        personals['size'] = bids.get_dicomfield('PatientSize', sourcefile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)

        return True
Ejemplo n.º 14
0
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict,
                   bidsfolder: Path, personals: dict, subprefix: str,
                   sesprefix: str) -> None:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param dataformat:  The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat)
    :param session:     The full-path name of the subject/session source file/folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    if dataformat == 'DICOM':
        sourcefile = Path()
        sources = bids.lsdirs(session)
        for source in sources:
            sourcefile = bids.get_dicomfile(source)
            manufacturer = bids.get_dicomfield('Manufacturer', sourcefile)
            if sourcefile.name:
                break

    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
        if sources:
            sourcefile = sources[0]

    else:
        LOGGER.error(
            f"Unsupported data format: {dataformat}\nPlease report this bug")
        return

    if not sources:
        LOGGER.info(f"No data found for: {session}")
        return

    subid, sesid = bids.get_subid_sesid(sourcefile,
                                        bidsmap[dataformat]['subject'],
                                        bidsmap[dataformat]['session'],
                                        subprefix, sesprefix)

    if subid == subprefix:
        LOGGER.error(f"No valid subject identifier found for: {session}")
        return

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = bidsfolder / subid / sesid
    if bidsses.is_dir():
        LOGGER.warning(
            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner"
        )
    bidsses.mkdir(parents=True, exist_ok=True)
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    for source in sources:

        # Get a source-file
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        run, datatype, index = bids.get_matching_run(sourcefile, bidsmap,
                                                     dataformat)

        # Check if we should ignore this run
        if datatype == bids.ignoredatatype:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if index is None:
            LOGGER.error(
                f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datatype
        else:
            outfolder = bidsses / datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec',
                        'tsv.gz'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(source, 2).name:
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                path=bidsmap['Options']['dcm2niix']['path'],
                args=bidsmap['Options']['dcm2niix']['args'],
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bids.run_command(command):
                continue

            # Replace uncropped output image with the cropped one
            if '-x y' in bidsmap['Options']['dcm2niix']['args']:
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*")
                ]))
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if postfix[0] == 'e' and bids.get_bidsvalue(
                            newbidsname, 'echo'
                    ):  # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix
                        echonr = f"_{postfix}"  # E.g. echonr='_e1' or echonr='_pha'
                        for dcm2niixpostfix in dcm2niixpostfixes:
                            echonr = echonr.replace(
                                dcm2niixpostfix, ''
                            )  # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a']
                        if echonr.isalpha():
                            echonr = ord(
                                echonr
                            ) - 95  # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc
                        elif not echonr:
                            echonr = 1
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'echo', str(echonr)
                        )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in ('magnitude', 'magnitude1',
                                                   'magnitude2', 'phase1',
                                                   'phase2', 'phasediff',
                                                   'fieldmap'):
                        if len(dcm2niixfiles) not in (
                                0, 2, 4, 6, 8
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.warning(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude')
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap')
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2')
                        if len(dcm2niixfiles) == 8:
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phase1'
                            )  # Two magnitude + 2 phase images in one folder / datasource
                        else:
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff'
                            )  # One or two magnitude + 1 phasediff image
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2')
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1')
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2')

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                # Save the file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)
                if ext == '.json':
                    oldjsonfile = (outfolder / bidsname).with_suffix('.json')
                    if oldjsonfile in jsonfiles and not oldjsonfile.is_file():
                        jsonfiles.remove(
                            (outfolder / bidsname).with_suffix('.json'))
                    jsonfiles.append(newbidsfile)

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Check if dcm2niix behaved as expected
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Unexpected file conversion result: {jsonfile} not found")
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if datatype == 'dwi':
                bvecfile = jsonfile.with_suffix('.bvec')
                bvalfile = jsonfile.with_suffix('.bval')
                if not bvecfile.is_file():
                    LOGGER.info(f"Adding dummy bvec file: {bvecfile}")
                    with bvecfile.open('w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not bvalfile.is_file():
                    LOGGER.info(f"Adding dummy bval file: {bvalfile}")
                    with bvalfile.open('w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif datatype == 'func':
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    LOGGER.info(f"Adding TaskName to: {jsonfile}")
                    data['TaskName'] = run['bids']['task']
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition)
            niifile = list(
                jsonfile.parent.glob(jsonfile.stem + '.nii*')
            )  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
            if niifile and datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datatype):
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'AcquisitionTime' not in data or not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield(
                        'AcquisitionTime', sourcefile)  # DICOM
                if not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield(
                        'exam_date', sourcefile)  # PAR/XML
                try:
                    acq_time = dateutil.parser.parse(data['AcquisitionTime'])
                except:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}"
                    )
                    acq_time = dateutil.parser.parse('00:00:00')
                scanpath = niifile[0].relative_to(bidsses)
                scans_table.loc[
                    scanpath.as_posix(),
                    'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap[dataformat]['fmap'] is not None:
        for fieldmap in bidsmap[dataformat]['fmap']:
            bidsname = bids.get_bidsname(subid, sesid, fieldmap)
            niifiles = []
            intendedfor = fieldmap['bids']['IntendedFor']

            # Search for the imaging files that match the IntendedFor search criteria
            if intendedfor:
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for selector in intendedfor:
                    niifiles.extend(
                        [
                            Path(niifile).relative_to(bidsfolder / subid)
                            for niifile in sorted(
                                bidsses.rglob(f"*{selector}*.nii*"))
                            if selector
                        ]
                    )  # Search in all runs using a relative path to the subject folder
            else:
                intendedfor = []

            # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label)
            jsonfiles = []
            acqlabel = bids.get_bidsvalue(bidsname, 'acq')
            patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace(
                '_magnitude1',
                '_magnitude*').replace('_magnitude2', '_magnitude*').replace(
                    '_phase1', '_phase*').replace('_phase2', '_phase*'),
                        bidsname.replace('_run-1_', '_run-[0-9]*_').replace(
                            '_magnitude1',
                            '_phase*').replace('_magnitude2', '_phase*'))
            for pattern in patterns:
                jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json'))
                if acqlabel:
                    cepattern = bids.get_bidsvalue(pattern, 'acq',
                                                   acqlabel + '[CE][0-9]*')
                    jsonfiles.extend(
                        list((bidsses / 'fmap').glob(cepattern + '.json')))

            # Save the meta-data in the jsonfiles
            for jsonfile in sorted(set(jsonfiles)):

                # Add the IntendedFor data
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'IntendedFor' not in data:
                    if niifiles:
                        LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                    elif intendedfor:
                        LOGGER.warning(
                            f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results"
                        )
                    else:
                        LOGGER.warning(
                            f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty"
                        )
                    data['IntendedFor'] = [
                        niifile.as_posix() for niifile in niifiles
                    ]  # The path needs to use forward slashes instead of backward slashes
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

                # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file
                if jsonfile.name.endswith('phasediff.json'):
                    json_magnitude = [None, None]
                    TE = [None, None]
                    for n in (0, 1):
                        json_magnitude[
                            n] = jsonfile.parent / jsonfile.name.replace(
                                '_phasediff', f"_magnitude{n+1}")
                        if not json_magnitude[n].is_file():
                            LOGGER.error(
                                f"Could not find expected magnitude{n+1} image associated with: {jsonfile}"
                            )
                        else:
                            with json_magnitude[n].open('r') as json_fid:
                                data = json.load(json_fid)
                            TE[n] = data['EchoTime']
                    if None in TE:
                        LOGGER.error(
                            f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}"
                        )
                    elif TE[0] > TE[1]:
                        LOGGER.error(
                            f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}"
                        )
                    else:
                        with jsonfile.open('r') as json_fid:
                            data = json.load(json_fid)
                        data['EchoTime1'] = TE[0]
                        data['EchoTime2'] = TE[1]
                        LOGGER.info(
                            f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}"
                        )
                        with jsonfile.open('w') as json_fid:
                            json.dump(data, json_fid, indent=4)

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat == 'DICOM' and sourcefile.name:
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return  # Only take data from the first session -> BIDS specification
        age = bids.get_dicomfield(
            'PatientAge', sourcefile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile)
        personals['size'] = bids.get_dicomfield('PatientSize', sourcefile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
Ejemplo n.º 15
0
def bidscoiner(rawfolder,
               bidsfolder,
               subjects=(),
               force=False,
               participants=False,
               bidsmapfile='code' + os.sep + 'bidsmap.yaml'):
    """
    Main function that processes all the subjects and session in the rawfolder and uses the
    bidsmap.yaml file in bidsfolder/code to cast the data into the BIDS folder.

    :param str rawfolder:     The root folder-name of the sub/ses/data/file tree containing the source data files
    :param str bidsfolder:    The name of the BIDS root folder
    :param list subjects:     List of selected sub-# names / folders to be processed. Otherwise all subjects in the rawfolder will be selected
    :param bool force:        If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped
    :param bool participants: If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True
    :param str bidsmapfile:   The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/
    :return:                  Nothing
    :rtype: NoneType
    """

    # Input checking
    rawfolder = os.path.abspath(os.path.expanduser(rawfolder))
    bidsfolder = os.path.abspath(os.path.expanduser(bidsfolder))
    os.makedirs(os.path.join(bidsfolder, 'code'), exist_ok=True)
    if not os.path.isfile(os.path.join(bidsfolder, '.bidsignore')):
        with open(os.path.join(bidsfolder, '.bidsignore'), 'w') as bidsignore:
            bidsignore.write(bids.unknownmodality + os.sep)

    # Start logging
    global logfile
    logfile = os.path.join(bidsfolder, 'code', 'bidscoiner.log')
    bids.printlog(
        '------------ START BIDScoiner {ver}: BIDS {bidsver} ------------\n>>> bidscoiner rawfolder={arg1} bidsfolder={arg2} subjects={arg3} force={arg4} participants={arg5} bidsmap={arg6}'
        .format(ver=bids.version(),
                bidsver=bids.bidsversion(),
                arg1=rawfolder,
                arg2=bidsfolder,
                arg3=subjects,
                arg4=force,
                arg5=participants,
                arg6=bidsmapfile), logfile)

    # Create a dataset description file if it does not exist
    dataset_file = os.path.join(bidsfolder, 'dataset_description.json')
    if not os.path.isfile(dataset_file):
        dataset_description = {
            "Name":
            "REQUIRED. Name of the dataset",
            "BIDSVersion":
            bids.bidsversion(),
            "License":
            "RECOMMENDED. What license is this dataset distributed under?. The use of license name abbreviations is suggested for specifying a license",
            "Authors": [
                "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset"
            ],
            "Acknowledgements":
            "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset",
            "HowToAcknowledge":
            "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset",
            "Funding":
            ["OPTIONAL. List of sources of funding (grant numbers)"],
            "ReferencesAndLinks": [
                "OPTIONAL. List of references to publication that contain information on the dataset, or links"
            ],
            "DatasetDOI":
            "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)"
        }
        bids.printlog('Creating dataset description file: ' + dataset_file,
                      logfile)
        with open(dataset_file, 'w') as fid:
            json.dump(dataset_description, fid, indent=4)

    # Create a README file if it does not exist
    readme_file = os.path.join(bidsfolder, 'README')
    if not os.path.isfile(readme_file):
        bids.printlog('Creating README file: ' + readme_file, logfile)
        with open(readme_file, 'w') as fid:
            fid.write(
                'A free form text ( README ) describing the dataset in more details that SHOULD be provided'
            )

    # Get the bidsmap heuristics from the bidsmap YAML-file
    bidsmap = bids.get_heuristics(bidsmapfile,
                                  os.path.join(bidsfolder, 'code'),
                                  logfile=logfile)

    # Get the table with subjects that have been processed
    participants_file = os.path.join(bidsfolder, 'participants.tsv')
    if os.path.exists(participants_file):
        participants_table = pd.read_table(participants_file)
    else:
        participants_table = pd.DataFrame(columns=['participant_id'])

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(rawfolder, 'sub-*')
    else:
        subjects = [
            os.path.join(rawfolder, subject) for subject in subjects
            if os.path.isdir(os.path.join(rawfolder, subject))
        ]

    # Loop over all subjects and sessions and convert them using the bidsmap entries
    for subject in subjects:

        if participants and subject in list(participants_table.participant_id):
            continue

        personals = dict()
        sessions = bids.lsdirs(subject, 'ses-*')
        if not sessions:
            sessions = subject
        for session in sessions:

            # Check if we should skip the session-folder
            if not force and os.path.isdir(
                    session.replace(rawfolder, bidsfolder)):
                continue

            # Update / append the dicom mapping
            if bidsmap['DICOM']:
                coin_dicom(session, bidsmap, bidsfolder, personals)

            # Update / append the PAR/REC mapping
            if bidsmap['PAR']:
                coin_par(session, bidsmap, bidsfolder, personals)

            # Update / append the P7 mapping
            if bidsmap['P7']:
                coin_p7(session, bidsmap, bidsfolder, personals)

            # Update / append the nifti mapping
            if bidsmap['Nifti']:
                coin_nifti(session, bidsmap, bidsfolder, personals)

            # Update / append the file-system mapping
            if bidsmap['FileSystem']:
                coin_filesystem(session, bidsmap, bidsfolder, personals)

            # Update / append the plugin mapping
            if bidsmap['PlugIn']:
                coin_plugin(session, bidsmap, bidsfolder, personals)

        # Write the collected personals to the participants_file
        if personals:
            for key in personals:
                if key not in participants_table.columns:
                    participants_table[key] = None
            participants_table = participants_table.append(
                personals, ignore_index=True, verify_integrity=True)
            bids.printlog('Writing subject data to: ' + participants_file,
                          logfile)
            participants_table.to_csv(participants_file,
                                      sep='\t',
                                      encoding='utf-8',
                                      index=False)

    bids.printlog('------------ FINISHED! ------------', logfile)
Ejemplo n.º 16
0
def bidscoiner(rawfolder: str,
               bidsfolder: str,
               subjects: list = (),
               force: bool = False,
               participants: bool = False,
               bidsmapfile: str = 'bidsmap.yaml',
               subprefix: str = 'sub-',
               sesprefix: str = 'ses-') -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder and uses the
    bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param subjects:        List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected
    :param force:           If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped
    :param participants:    If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()
    bidsmapfile = Path(bidsmapfile)

    # Start logging
    bids.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log')
    LOGGER.info('')
    LOGGER.info(
        f"-------------- START BIDScoiner {bids.version()}: BIDS {bids.bidsversion()} ------------"
    )
    LOGGER.info(
        f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force}"
        f" participants={participants} bidsmap={bidsmapfile} subprefix={subprefix} sesprefix={sesprefix}"
    )

    # Create a code/bidscoin subfolder
    (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True)

    # Create a dataset description file if it does not exist
    dataset_file = bidsfolder / 'dataset_description.json'
    if not dataset_file.is_file():
        dataset_description = {
            "Name":
            "REQUIRED. Name of the dataset",
            "BIDSVersion":
            str(bids.bidsversion()),
            "DatasetType":
            "raw",
            "License":
            "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file",
            "Authors": [
                "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset"
            ],
            "Acknowledgements":
            "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding",
            "HowToAcknowledge":
            "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset",
            "Funding":
            ["OPTIONAL. List of sources of funding (grant numbers)"],
            "EthicsApprovals": [
                "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers"
            ],
            "ReferencesAndLinks": [
                "OPTIONAL. List of references to publication that contain information on the dataset, or links",
                "https://github.com/Donders-Institute/bidscoin"
            ],
            "DatasetDOI":
            "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)"
        }
        LOGGER.info(f"Creating dataset description file: {dataset_file}")
        with open(dataset_file, 'w') as fid:
            json.dump(dataset_description, fid, indent=4)

    # Create a README file if it does not exist
    readme_file = bidsfolder / 'README'
    if not readme_file.is_file():
        LOGGER.info(f"Creating README file: {readme_file}")
        with open(readme_file, 'w') as fid:
            fid.write(
                f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n"
                f"The raw BIDS data was created using BIDScoin {bids.version()}\n"
                f"All provenance information and settings can be found in ./code/bidscoin\n"
                f"For more information see: https://github.com/Donders-Institute/bidscoin"
            )

    # Get the bidsmap heuristics from the bidsmap YAML-file
    bidsmap, _ = bids.load_bidsmap(bidsmapfile,
                                   bidsfolder / 'code' / 'bidscoin')
    if not bidsmap:
        LOGGER.error(
            f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and / or use the correct bidsfolder"
        )
        return

    # Save options to the .bidsignore file
    bidsignore_items = [
        item.strip()
        for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';')
    ]
    LOGGER.info(
        f"Writing {bidsignore_items} entries to {bidsfolder}.bidsignore")
    with (bidsfolder / '.bidsignore').open('w') as bidsignore:
        for item in bidsignore_items:
            bidsignore.write(item + '\n')

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv = bidsfolder / 'participants.tsv'
    participants_json = participants_tsv.with_suffix('.json')
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    else:
        participants_dict = {
            'participant_id': {
                'Description': 'Unique participant identifier'
            }
        }

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(rawfolder, subprefix + '*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*")
    else:
        subjects = [
            subprefix + re.sub(f"^{subprefix}", '', subject)
            for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            rawfolder / subject for subject in subjects
            if (rawfolder / subject).is_dir()
        ]

    # Loop over all subjects and sessions and convert them using the bidsmap entries
    for n, subject in enumerate(subjects, 1):

        LOGGER.info(
            f"------------------- Subject {n}/{len(subjects)} -------------------"
        )
        if participants and subject.name in list(participants_table.index):
            LOGGER.info(f"Skipping subject: {subject} ({n}/{len(subjects)})")
            continue

        personals = dict()
        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
            session, unpacked = bids.unpack(session, subprefix, sesprefix)

            # See what dataformat we have
            dataformat = bids.get_dataformat(session)
            if not dataformat:
                LOGGER.info(f"Skipping unknown session: {session}")
                continue

            # Check if we should skip the session-folder
            if not force:
                subid, sesid = bids.get_subid_sesid(session / 'dum.my',
                                                    subprefix=subprefix,
                                                    sesprefix=sesprefix)
                bidssession = bidsfolder / subid / sesid
                if not bidsmap[dataformat]['session']:
                    bidssession = bidssession.parent
                datatypes = []
                for datatype in bids.lsdirs(
                        bidssession
                ):  # See what datatypes we already have in the bids session-folder
                    if datatype.glob('*') and bidsmap[dataformat].get(
                            datatype.name
                    ):  # See if we are going to add data for this datatype
                        datatypes.append(datatype.name)
                if datatypes:
                    LOGGER.info(
                        f"Skipping processed session: {bidssession} already has {datatypes} data (use the -f option to overrule)"
                    )
                    continue

            LOGGER.info(f"Coining session: {session}")

            # Update / append the source data mapping
            if dataformat in ('DICOM', 'PAR'):
                coin_data2bids(dataformat, session, bidsmap, bidsfolder,
                               personals, subprefix, sesprefix)

            # Update / append the P7 mapping
            if dataformat == 'P7':
                LOGGER.error(
                    f"{dataformat} not (yet) supported, skipping session: {session}"
                )
                continue

            # Update / append the nifti mapping
            if dataformat == 'Nifti':
                coin_nifti(session, bidsmap, bidsfolder, personals)

            # Update / append the file-system mapping
            if dataformat == 'FileSystem':
                coin_filesystem(session, bidsmap, bidsfolder, personals)

            # Update / append the plugin mapping
            if bidsmap['PlugIns']:
                coin_plugin(session, bidsmap, bidsfolder, personals)

            # Clean-up the temporary unpacked data
            if unpacked:
                shutil.rmtree(session)

        # Store the collected personals in the participant_table
        for key in personals:

            # participant_id is the index of the participants_table
            assert 'participant_id' in personals
            if key == 'participant_id':
                continue

            # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file

            if key not in participants_dict:
                participants_dict[key] = dict(
                    LongName='Long (unabbreviated) name of the column',
                    Description='Description of the the column',
                    Levels=dict(
                        Key=
                        'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'
                    ),
                    Units=
                    'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED',
                    TermURL=
                    'URL pointing to a formal definition of this type of data in an ontology available on the web'
                )
            participants_table.loc[personals['participant_id'],
                                   key] = personals[key]

    # Write the collected data to the participant files
    LOGGER.info(f"Writing subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')

    LOGGER.info(f"Writing subject data dictionary to: {participants_json}")
    with participants_json.open('w') as json_fid:
        json.dump(participants_dict, json_fid, indent=4)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bids.reporterrors()
Ejemplo n.º 17
0
def coin_dicom(session: Path, bidsmap: dict, bidsfolder: Path, personals: dict,
               subprefix: str, sesprefix: str) -> None:
    """
    Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the dicom header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    if not bids.lsdirs(session):
        LOGGER.warning(f"No run subfolder(s) found in: {session}")
        return

    TE = [None, None]

    # Get valid BIDS subject/session identifiers from the (first) dicom-header or from the session source folder
    subid, sesid = bids.get_subid_sesid(
        bids.get_dicomfile(bids.lsdirs(session)[0]),
        bidsmap['DICOM']['subject'], bidsmap['DICOM']['session'], subprefix,
        sesprefix)
    if subid == subprefix:
        LOGGER.error(f"No valid subject identifier found for: {session}")
        return

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = bidsfolder / subid / sesid
    if bidsses.is_dir():
        LOGGER.warning(
            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner"
        )
    bidsses.mkdir(parents=True, exist_ok=True)
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the dicom run subfolders
    for runfolder in bids.lsdirs(session):

        # Get a dicom-file
        dicomfile = bids.get_dicomfile(runfolder)
        if not dicomfile.name: continue

        # Get a matching run from the bidsmap
        run, modality, index = bids.get_matching_run(dicomfile, bidsmap)

        # Check if we should ignore this run
        if modality == bids.ignoremodality:
            LOGGER.info(f"Leaving out: {runfolder}")
            continue

        # Check if we already know this run
        if index is None:
            LOGGER.warning(
                f"Skipping unknown '{modality}': {dicomfile}\n-> re-run the bidsmapper and delete {session} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {runfolder}")

        # Create the BIDS session/modality folder
        bidsmodality = bidsses / modality
        bidsmodality.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, modality, run)
        runindex = run['bids']['run']
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(bidsmodality, bidsname)

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (bidsmodality / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{bidsmodality/bidsname}.* already exists -- check your results carefully!"
            )

        # Convert the dicom-files in the run folder to nifti's in the BIDS-folder
        command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format(
            path=bidsmap['Options']['dcm2niix']['path'],
            args=bidsmap['Options']['dcm2niix']['args'],
            filename=bidsname,
            outfolder=bidsmodality,
            infolder=runfolder)
        if not bids.run_command(command):
            continue

        # Replace uncropped output image with the cropped one
        if '-x y' in bidsmap['Options']['dcm2niix']['args']:
            for filename in sorted(
                    bidsmodality.glob(bidsname +
                                      '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                ext = ''.join(filename.suffixes)
                newfilename = str(filename).rsplit(ext, 1)[0].rsplit(
                    '_Crop_', 1)[0] + ext
                LOGGER.info(
                    f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}"
                )
                filename.replace(newfilename)

        # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively
        jsonfiles = [
        ]  # Collect the associated json-files (for updating them later) -- possibly > 1
        for dcm2niisuffix in ('_c', '_e', '_ph', '_i'):
            for filename in sorted(
                    bidsmodality.glob(bidsname + dcm2niisuffix + '*')):
                ext = ''.join(filename.suffixes)
                basepath, index = str(filename).rsplit(ext, 1)[0].rsplit(
                    dcm2niisuffix, 1
                )  # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1)
                basesuffix = basepath.rsplit(
                    '_', 1
                )[1]  # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1
                index = index.split('_')[0].zfill(
                    2
                )  # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files)

                # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file
                if dcm2niisuffix in (
                        '_c', '_e'
                ) and int(index) == 2 and basesuffix not in [
                        'magnitude1', 'phase1'
                ]:  # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below)
                    filename_ce = Path(
                        basepath + ext)  # The file without the _c1/_e1 suffix
                    if dcm2niisuffix == '_e' and bids.get_bidsvalue(
                            basepath, 'echo'):
                        newbasepath_ce = Path(
                            bids.get_bidsvalue(basepath, 'echo', '1'))
                    else:
                        newbasepath_ce = Path(
                            bids.get_bidsvalue(
                                basepath, 'dummy',
                                dcm2niisuffix.upper() + '1'.zfill(len(index)))
                        )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data
                    newfilename_ce = newbasepath_ce.with_suffix(
                        ext)  # The file as it should have been
                    if filename_ce.is_file():
                        if filename_ce != newfilename_ce:
                            LOGGER.info(
                                f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}"
                            )
                            filename_ce.replace(newfilename_ce)
                        if ext == '.json':
                            jsonfiles.append(
                                newbasepath_ce.with_suffix('.json'))

                # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder)
                if dcm2niisuffix == '_e' and bids.get_bidsvalue(
                        basepath, 'echo') and index:
                    basepath = bids.get_bidsvalue(
                        basepath, 'echo', str(int(index))
                    )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                elif dcm2niisuffix == '_e' and basesuffix in (
                        'magnitude1',
                        'magnitude2') and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + str(
                        int(index)
                    )  # basepath: *_magnitude1_e[index] -> *_magnitude[index]
                    # Collect the echo times that need to be added to the json-file (see below)
                    if filename.suffix == '.json':
                        with filename.open('r') as json_fid:
                            data = json.load(json_fid)
                        TE[int(index) - 1] = data['EchoTime']
                        LOGGER.info(
                            f"Collected EchoTime{index} = {data['EchoTime']} from: {filename}"
                        )
                elif dcm2niisuffix == '_e' and basesuffix == 'phasediff' and index:  # i.e. modality == 'fmap'
                    pass

                elif dcm2niisuffix == '_e' and basesuffix in [
                        'phase1', 'phase2'
                ] and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + str(
                        int(index)
                    )  # basepath: *_phase1_e[index]_ph -> *_phase[index]

                else:
                    basepath = bids.get_bidsvalue(
                        basepath, 'dummy',
                        dcm2niisuffix.upper() + index
                    )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data

                # Save the file with a new name
                newbidsname = str(Path(basepath).name)
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        bidsmodality, newbidsname, ext
                    )  # Update the runindex now that the acq-label has changed
                newfilename = (bidsmodality / newbidsname).with_suffix(ext)
                LOGGER.info(
                    f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}"
                )
                filename.replace(newfilename)
                if ext == '.json':
                    jsonfiles.append(
                        (bidsmodality / newbidsname).with_suffix('.json'))

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        if not jsonfiles:
            jsonfiles = [(bidsmodality / bidsname).with_suffix('.json')]
        for jsonfile in set(jsonfiles):

            # Check if dcm2niix behaved as expected
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Unexpected file conversion result: {jsonfile} not found")
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if modality == 'dwi':
                bvecfile = jsonfile.with_suffix('.bvec')
                bvalfile = jsonfile.with_suffix('.bval')
                if not bvecfile.is_file():
                    LOGGER.info(f"Adding dummy bvec file: {bvecfile}")
                    with bvecfile.open('w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not bvalfile.is_file():
                    LOGGER.info(f"Adding dummy bval file: {bvalfile}")
                    with bvalfile.open('w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif modality == 'func':
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    LOGGER.info(f"Adding TaskName to: {jsonfile}")
                    data['TaskName'] = run['bids']['task']
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude runs have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-runs being saved after the magnitude runs
            elif modality == 'fmap':
                if run['bids']['suffix'] == 'phasediff':
                    LOGGER.info(
                        f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}"
                    )
                    if TE[0] is None or TE[1] is None:
                        LOGGER.warning(
                            f"Missing Echo-Time data for: {jsonfile}")
                    elif TE[0] > TE[1]:
                        LOGGER.warning(
                            f"EchoTime1 > EchoTime2 for: {jsonfile}")
                    with jsonfile.open('r') as json_fid:
                        data = json.load(json_fid)
                    data['EchoTime1'] = TE[0]
                    data['EchoTime2'] = TE[1]
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Parse the acquisition time from the json file or else from the dicom header (NB: assuming the dicom file represents the first aqcuisition)
            with jsonfile.open('r') as json_fid:
                data = json.load(json_fid)
            if 'AcquisitionTime' not in data:
                data['AcquisitionTime'] = bids.get_dicomfield(
                    'AcquisitionTime', dicomfile)
            acq_time = dateutil.parser.parse(data['AcquisitionTime'])
            scanpath = list(
                jsonfile.parent.glob(jsonfile.stem + '.nii*')
            )[0].relative_to(
                bidsses
            )  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
            scans_table.loc[
                scanpath.as_posix(),
                'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap['DICOM']['fmap'] is not None:
        for fieldmap in bidsmap['DICOM']['fmap']:
            bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap)
            niifiles = []
            intendedfor = fieldmap['bids']['IntendedFor']

            # Search for the imaging files that match the IntendedFor search criteria
            if intendedfor:
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for selector in intendedfor:
                    niifiles.extend(
                        [
                            Path(niifile).relative_to(bidsfolder / subid)
                            for niifile in sorted(
                                bidsses.rglob(f"*{selector}*.nii*"))
                            if selector
                        ]
                    )  # Search in all runs using a relative path to the subject folder
            else:
                intendedfor = []

            # Save the IntendedFor data in the json-files (account for multiple runs and dcm2niix suffixes inserted into the acquisition label)
            acqlabel = bids.get_bidsvalue(bidsname, 'acq')
            for jsonfile in list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_') + '.json')) + \
                            list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_').replace(acqlabel, acqlabel+'[CE][0-9]*') + '.json')):

                if niifiles:
                    LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                elif intendedfor:
                    LOGGER.warning(
                        f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results"
                    )
                else:
                    LOGGER.warning(
                        f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty"
                    )
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                data['IntendedFor'] = [
                    niifile.as_posix() for niifile in niifiles
                ]  # The path needs to use forward slashes instead of backward slashes
                with jsonfile.open('w') as json_fid:
                    json.dump(data, json_fid, indent=4)

                # Catch magnitude2 and phase2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same runfolder)
                if jsonfile.name.endswith(
                        'magnitude1.json') or jsonfile.name.endswith(
                            'phase1.json'):
                    jsonfile2 = jsonfile.with_name(
                        jsonfile.name.rsplit('1.json', 1)[0] + '2.json')
                    if jsonfile2.is_file():
                        with jsonfile2.open('r') as json_fid:
                            data = json.load(json_fid)
                        if 'IntendedFor' not in data:
                            if niifiles:
                                LOGGER.info(
                                    f"Adding IntendedFor to: {jsonfile2}")
                            else:
                                LOGGER.warning(
                                    f"Empty 'IntendedFor' fieldmap value in {jsonfile2}: the search for {intendedfor} gave no results"
                                )
                            data['IntendedFor'] = [
                                niifile.as_posix() for niifile in niifiles
                            ]  # The path needs to use forward slashes instead of backward slashes
                            with jsonfile2.open('w') as json_fid:
                                json.dump(data, json_fid, indent=4)

    # Collect personal data from the DICOM header: only from the first session (-> BIDS specification)
    if 'runfolder' in locals():
        dicomfile = bids.get_dicomfile(runfolder)
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return
        age = bids.get_dicomfield(
            'PatientAge', dicomfile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile)
        personals['size'] = bids.get_dicomfield('PatientSize', dicomfile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
Ejemplo n.º 18
0
def rawmapper(rawfolder,
              outfolder=None,
              rename=False,
              dicomfield=('PatientComments', ),
              wildcard='*',
              dryrun=False):
    """
    :param str rawfolder:   The root folder-name of the sub/ses/data/file tree containing the source data files
    :param str outfolder:   The name of the folder where the mapping-file is saved (default = rawfolder)
    :param bool rename:     Flag for renaming the sub-subid folders to sub-dicomfield
    :param list dicomfield: The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval)
    :param str wildcard:    The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped
    :param bool dryrun:     Flag for dry-running renaming the sub-subid folders
    :return:                Nothing
    :rtype: NoneType
    """

    # Input checking
    if not outfolder:
        outfolder = rawfolder
    rawfolder = os.path.abspath(os.path.expanduser(rawfolder))
    outfolder = os.path.abspath(os.path.expanduser(outfolder))

    # Create a output mapper-file
    if not dryrun:
        mapperfile = os.path.join(
            outfolder, 'rawmapper_{}.tsv'.format('_'.join(dicomfield)))
        if rename:
            with open(mapperfile, 'a') as fid:
                fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid',
                                                    'newsubid', 'newsesid'))
        else:
            with open(mapperfile, 'x') as fid:
                fid.write('{}\t{}\t{}\t{}\n'.format('subid', 'sesid',
                                                    'seriesname',
                                                    '\t'.join(dicomfield)))

    # Loop over all subjects and sessions in the rawfolder
    for subject in bids.lsdirs(rawfolder, 'sub-*'):

        sessions = bids.lsdirs(subject, 'ses-*')
        if not sessions: sessions = subject
        for session in sessions:

            # Get the subject and session identifiers from the raw folder
            subid = 'sub-' + session.rsplit(os.sep + 'sub-', 1)[1].split(
                os.sep + 'ses-', 1)[0]
            sesid = 'ses-' + session.rsplit(os.sep + 'ses-')[1]

            # Parse the new subject and session identifiers from the dicomfield
            series = bids.lsdirs(session, wildcard)
            if not series:
                series = ''
                dcmval = ''
            else:
                series = series[0]  # TODO: loop over series?
                dcmval = ''
                for dcmfield in dicomfield:
                    dcmval = dcmval + '/' + str(
                        bids.get_dicomfield(dcmfield,
                                            bids.get_dicomfile(series))
                    )  # TODO: test how newlines from the console work out
                dcmval = dcmval[1:]

            # Rename the session subfolder in the rawfolder and print & save this info
            if rename:

                # Get the new subid and sesid
                if not dcmval or dcmval == 'None':
                    warnings.warn(
                        'Skipping renaming because the dicom-field was empty for: '
                        + session)
                    continue
                else:
                    if '/' in dcmval:  # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments)
                        delim = '/'
                    elif '\\' in dcmval:
                        delim = '\\'
                    else:
                        delim = '\n'
                    newsubsesid = dcmval.split(delim)
                    newsubid = 'sub-' + bids.cleanup_label(
                        newsubsesid[0].replace('sub-', ''))
                    if newsubid == 'sub-' or newsubid == 'sub-None':
                        newsubid = subid
                        warnings.warn(
                            'Could not rename {} because the dicom-field was empty for: {}'
                            .format(subid, session))
                    if len(newsubsesid) == 1:
                        newsesid = sesid
                    elif len(newsubsesid) == 2:
                        newsesid = 'ses-' + bids.cleanup_label(
                            newsubsesid[1].replace('ses-', ''))
                    else:
                        warnings.warn(
                            'Skipping renaming of {} because the dicom-field "{}" could not be parsed into [subid, sesid]'
                            .format(session, dcmval))
                        continue
                    if newsesid == 'ses-' or newsesid == 'ses-None':
                        newsesid = sesid
                        warnings.warn(
                            'Could not rename {} because the dicom-field was empty for: {}'
                            .format(sesid, session))

                # Save the dicomfield / sub-ses mapping to disk and rename the session subfolder (but skip if it already exists)
                newsession = os.path.join(rawfolder, newsubid, newsesid)
                print(session + ' -> ' + newsession)
                if newsession == session:
                    continue
                if os.path.isdir(newsession):
                    warnings.warn(
                        '{} already exists, skipping renaming of {}'.format(
                            newsession, session))
                elif not dryrun:
                    with open(os.path.join(outfolder, mapperfile), 'a') as fid:
                        fid.write('{}\t{}\t{}\t{}\n'.format(
                            subid, sesid, newsubid, newsesid))
                    os.renames(session, newsession)

            # Print & save the dicom values
            else:
                print('{}{}{}\t-> {}'.format(subid + os.sep, sesid + os.sep,
                                             os.path.basename(series),
                                             '\t'.join(dcmval.split('/'))))
                if not dryrun:
                    with open(os.path.join(outfolder, mapperfile), 'a') as fid:
                        fid.write('{}\t{}\t{}\t{}\n'.format(
                            subid, sesid, os.path.basename(series),
                            '\t'.join(dcmval.split('/'))))
Ejemplo n.º 19
0
def echocombine(bidsdir: str,
                pattern: str,
                subjects: list,
                output: str,
                algorithm: str,
                weights: list,
                force: bool = False):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike recursive search pattern (relative to the subject/session folder) to select the first echo of the images that need to be combined, e.g. '*task-*echo-1*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the output is saved. It can be the name of a BIDS datatype folder, such as 'func', or of the derivatives folder, i.e. 'derivatives'. If output = [the name of the input datatype folder] then the original echo images are replaced by one combined image. If output is left empty then the combined image is saved in the input datatype folder and the original echo images are moved to the {bids.unknowndatatype} folder
    :param algorithm:   Combination algorithm, either 'PAID', 'TE' or 'average'
    :param weights:     Weights for each echo
    :param force:       Boolean to overwrite existing ME target files
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bids.setup_logging(bidsdir / 'code' / 'bidscoin' / 'echocombine.log')
    LOGGER.info('')
    LOGGER.info(f"--------- START echocombine ---------")
    LOGGER.info(
        f">>> echocombine bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
        f" algorithm={algorithm} weights={weights}")

    if 'echo' not in pattern:
        LOGGER.warning(
            f"Missing 'echo-#' substring in glob-like search pattern, i.e. '{pattern}' does not seem to select the first echo"
        )

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = [
            'sub-' + subject.replace('^sub-', '') for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            bidsdir / subject for subject in subjects
            if (bidsdir / subject).is_dir()
        ]

    # Loop over bids subject/session-directories
    for n, subject in enumerate(subjects, 1):

        sessions = bids.lsdirs(subject, 'ses-*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            LOGGER.info('-------------------------------------')
            LOGGER.info(
                f"Combining echos for ({n}/{len(subjects)}): {session}")

            sub_id, ses_id = bids.get_subid_sesid(session / 'dum.my')

            # Search for multi-echo matches
            for match in sorted([
                    match for match in session.rglob(pattern)
                    if '.nii' in match.suffixes
            ]):

                # Check if it is normal/BIDS multi-echo data
                datatype = match.parent.name
                echonr = bids.get_bidsvalue(match, 'echo')
                mepattern = bids.get_bidsvalue(match, 'echo', '*')
                echos = sorted(match.parent.glob(mepattern.name))
                newechos = [
                    echo.parents[1] / bids.unknowndatatype / echo.name
                    for echo in echos
                ]
                if not echonr:
                    LOGGER.warning(
                        f"No 'echo' key-value pair found in the filename, skipping: {match}"
                    )
                    continue
                if len(echos) == 1:
                    LOGGER.warning(
                        f"Only one echo image found, nothing to do for: {match}"
                    )
                    continue

                # Construct the combined-echo output filename and check if that file already exists
                cename = match.name.replace(f"_echo-{echonr}", '')
                if not output:
                    cefile = session / datatype / cename
                elif output == 'derivatives':
                    cefile = bidsdir / 'derivatives' / 'multiecho' / sub_id / ses_id / datatype / cename
                else:
                    cefile = session / output / cename
                cefile.parent.mkdir(parents=True, exist_ok=True)
                if cefile.is_file() and not force:
                    LOGGER.warning(
                        f"Outputfile {cefile} already exists, skipping: {match}"
                    )
                    continue

                # Combine the multi-echo images
                me.me_combine(mepattern,
                              cefile,
                              algorithm,
                              weights,
                              saveweights=False,
                              logger=LOGGER.name)

                # (Re)move the original multi-echo images
                if not output:
                    for echo, newecho in zip(echos, newechos):
                        LOGGER.info(
                            f"Moving original echo image: {echo} -> {newecho}")
                        newecho.parent.mkdir(parents=True, exist_ok=True)
                        echo.replace(newecho)
                        echo.with_suffix('').with_suffix('.json').replace(
                            newecho.with_suffix('').with_suffix('.json'))
                elif output == datatype:
                    for echo in echos:
                        LOGGER.info(f"Removing original echo image: {echo}")
                        echo.unlink()
                        echo.with_suffix('').with_suffix('.json').unlink()

                # Construct relative path names as they are used in BIDS
                echos_rel = [str(echo.relative_to(session)) for echo in echos]
                newechos_rel = [
                    str(echo.relative_to(session)) for echo in newechos
                ]
                if output != 'derivatives':
                    cefile_rel = str(cefile.relative_to(session))

                # Update the IntendedFor fields in the fieldmap sidecar files (i.e. remove the old echos, add the echo-combined image and, optionally, the new echos)
                if output != 'derivatives' and (session / 'fmap').is_dir():
                    for fmap in (session / 'fmap').glob('*.json'):
                        with fmap.open('r') as fmap_fid:
                            fmap_data = json.load(fmap_fid)
                        if 'IntendedFor' in fmap_data:
                            intendedfor = fmap_data['IntendedFor']
                            if type(intendedfor) == str:
                                intendedfor = [intendedfor]
                            if echos_rel[0] in intendedfor:
                                LOGGER.info(
                                    f"Updating 'IntendedFor' to {cefile_rel} in {fmap}"
                                )
                                if not output:
                                    intendedfor = [
                                        file for file in intendedfor
                                        if not file in echos_rel
                                    ] + [cefile_rel] + [
                                        newecho for newecho in newechos_rel
                                    ]
                                elif output == datatype:
                                    intendedfor = [
                                        file for file in intendedfor
                                        if not file in echos_rel
                                    ] + [cefile_rel]
                                else:
                                    intendedfor = intendedfor + [cefile_rel]
                                fmap_data['IntendedFor'] = intendedfor
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                # Update the scans.tsv file
                if (bidsdir / '.bidsignore').is_file():
                    with (bidsdir / '.bidsignore').open('r') as fid:
                        bidsignore = fid.read().splitlines()
                else:
                    bidsignore = [bids.unknowndatatype + '/']
                bidsignore.append('derivatives/')
                scans_tsv = session / f"{sub_id}{bids.add_prefix('_',ses_id)}_scans.tsv"
                if output + '/' not in bidsignore and scans_tsv.is_file():

                    LOGGER.info(f"Adding {cefile_rel} to {scans_tsv}")
                    scans_table = pd.read_csv(scans_tsv,
                                              sep='\t',
                                              index_col='filename')
                    scans_table.loc[cefile_rel] = scans_table.loc[echos_rel[0]]

                    for echo, newecho in zip(echos_rel, newechos_rel):
                        if not output:
                            LOGGER.info(
                                f"Updating {echo} -> {newecho} in {scans_tsv}")
                            scans_table.loc[newecho] = scans_table.loc[echo]
                            scans_table.drop(echo, inplace=True)
                        elif output == datatype:
                            LOGGER.info(f"Removing {echo} from {scans_tsv}")
                            scans_table.drop(echo, inplace=True)

                    scans_table.sort_values(by=['acq_time', 'filename'],
                                            inplace=True)
                    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Ejemplo n.º 20
0
def deface(bidsdir: str, pattern: str, subjects: list, output: str, cluster: bool, nativespec: str, kwargs: dict):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike search pattern (relative to the subject/session folder) to select the images that need to be defaced, e.g. 'anat/*_T1w*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the defaced images are saved. It can be the name of a BIDS datatype folder, such as 'anat', or of the derivatives folder, i.e. 'derivatives'. If output is left empty then the original images are replaced by the defaced images
    :param cluster:     Flag to submit the deface jobs to the high-performance compute (HPC) cluster
    :param nativespec:  DRMAA native specifications for submitting deface jobs to the HPC cluster
    :param kwargs:      Additional arguments (in dict/json-style) that are passed to pydeface. See examples for usage
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bids.setup_logging(bidsdir/'code'/'bidscoin'/'deface.log')
    LOGGER.info('')
    LOGGER.info('------------ START deface ------------')
    LOGGER.info(f">>> deface bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
                f" cluster={cluster} nativespec={nativespec} {kwargs}")

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = ['sub-' + subject.replace('^sub-', '') for subject in subjects]              # Make sure there is a "sub-" prefix
        subjects = [bidsdir/subject for subject in subjects if (bidsdir/subject).is_dir()]

    # Prepare the HPC job submission
    with drmaa.Session() as pbatch:
        if cluster:
            jt                     = pbatch.createJobTemplate()
            jt.jobEnvironment      = os.environ
            jt.remoteCommand       = shutil.which('pydeface')
            jt.nativeSpecification = nativespec
            jt.joinFiles           = True

        # Loop over bids subject/session-directories
        for n, subject in enumerate(subjects, 1):

            sessions = bids.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                sub_id, ses_id = bids.get_subid_sesid(session/'dum.my')

                # Search for images that need to be defaced
                for match in sorted([match for match in session.glob(pattern) if '.nii' in match.suffixes]):

                    # Construct the output filename and relative path name (used in BIDS)
                    match_rel = str(match.relative_to(session))
                    if not output:
                        outputfile     = match
                        outputfile_rel = match_rel
                    elif output == 'derivatives':
                        outputfile     = bidsdir/'derivatives'/'deface'/sub_id/ses_id/match.parent.name/match.name
                        outputfile_rel = str(outputfile.relative_to(bidsdir))
                    else:
                        outputfile     = session/output/match.name
                        outputfile_rel = str(outputfile.relative_to(session))
                    outputfile.parent.mkdir(parents=True, exist_ok=True)

                    # Deface the image
                    LOGGER.info(f"Defacing: {match_rel} -> {outputfile_rel}")
                    if cluster:
                        jt.args    = [str(match), '--outfile', str(outputfile), '--force'] + [item for pair in [[f"--{key}",val] for key,val in kwargs.items()] for item in pair]
                        jt.jobName = f"pydeface_{sub_id}_{ses_id}"
                        jobid      = pbatch.runJob(jt)
                        LOGGER.info(f"Your deface job has been submitted with ID: {jobid}")
                    else:
                        pdu.deface_image(str(match), str(outputfile), force=True, forcecleanup=True, **kwargs)

                    # Overwrite or add a json sidecar-file
                    inputjson  = match.with_suffix('').with_suffix('.json')
                    outputjson = outputfile.with_suffix('').with_suffix('.json')
                    if inputjson.is_file() and inputjson != outputjson:
                        if outputjson.is_file():
                            LOGGER.info(f"Overwriting the json sidecar-file: {outputjson}")
                            outputjson.unlink()
                        else:
                            LOGGER.info(f"Adding a json sidecar-file: {outputjson}")
                        shutil.copyfile(inputjson, outputjson)

                    # Add a custom "Defaced" field to the json sidecar-file
                    with outputjson.open('r') as output_fid:
                        data = json.load(output_fid)
                    data['Defaced'] = True
                    with outputjson.open('w') as output_fid:
                        json.dump(data, output_fid, indent=4)

                    # Update the IntendedFor fields in the fieldmap sidecar-files
                    if output and output != 'derivatives' and (session/'fmap').is_dir():
                        for fmap in (session/'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                fmap_data = json.load(fmap_fid)
                            intendedfor = fmap_data['IntendedFor']
                            if type(intendedfor)==str:
                                intendedfor = [intendedfor]
                            if match_rel in intendedfor:
                                LOGGER.info(f"Updating 'IntendedFor' to {outputfile_rel} in {fmap}")
                                fmap_data['IntendedFor'] = intendedfor + [outputfile_rel]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir/'.bidsignore').is_file():
                        with (bidsdir/'.bidsignore').open('r') as fid:
                            bidsignore = fid.read().splitlines()
                    else:
                        bidsignore = [bids.unknowndatatype + '/']
                    bidsignore.append('derivatives/')
                    scans_tsv = session/f"{sub_id}{bids.add_prefix('_',ses_id)}_scans.tsv"
                    if output and output+'/' not in bidsignore and scans_tsv.is_file():
                        LOGGER.info(f"Adding {outputfile_rel} to {scans_tsv}")
                        scans_table                     = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
                        scans_table.loc[outputfile_rel] = scans_table.loc[match_rel]
                        scans_table.sort_values(by=['acq_time','filename'], inplace=True)
                        scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

        if cluster:
            LOGGER.info('Waiting for the deface jobs to finish...')
            pbatch.synchronize(jobIds=[pbatch.JOB_IDS_SESSION_ALL], timeout=pbatch.TIMEOUT_WAIT_FOREVER, dispose=True)
            pbatch.deleteJobTemplate(jt)


    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Ejemplo n.º 21
0
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, subprefix: str='sub-', sesprefix: str='ses-', store: bool=False, interactive: bool=True) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder
    and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin.
    Folders in sourcefolder are assumed to contain a single dataset.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param bidsmapfile:     The name of the bidsmap YAML-file
    :param templatefile:    The name of the bidsmap template YAML-file
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :param store:           If True, the provenance samples will be stored
    :param interactive:     If True, the user will be asked for help if an unknown run is encountered
    :return:bidsmapfile:    The name of the mapped bidsmap YAML-file
    """

    # Input checking
    rawfolder      = Path(rawfolder).resolve()
    bidsfolder     = Path(bidsfolder).resolve()
    bidsmapfile    = Path(bidsmapfile)
    templatefile   = Path(templatefile)
    bidscoinfolder = bidsfolder/'code'/'bidscoin'

    # Start logging
    bids.setup_logging(bidscoinfolder/'bidsmapper.log')
    LOGGER.info('')
    LOGGER.info('-------------- START BIDSmapper ------------')
    LOGGER.info(f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} "
                f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}")

    # Get the heuristics for filling the new bidsmap
    bidsmap_old, _ = bids.load_bidsmap(bidsmapfile,  bidscoinfolder)
    template, _    = bids.load_bidsmap(templatefile, bidscoinfolder)

    # Create the new bidsmap as a copy / bidsmap skeleton with no datatype entries (i.e. bidsmap with empty lists)
    if bidsmap_old:
        bidsmap_new = copy.deepcopy(bidsmap_old)
    else:
        bidsmap_new = copy.deepcopy(template)
    for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'):
        for datatype in bids.bidsdatatypes + (bids.unknowndatatype, bids.ignoredatatype):
            if bidsmap_new.get(logic) and datatype in bidsmap_new[logic]:
                bidsmap_new[logic][datatype] = None

    # Start with an empty skeleton if we didn't have an old bidsmap
    if not bidsmap_old:
        bidsmap_old = copy.deepcopy(bidsmap_new)

    # Start the Qt-application
    gui = interactive
    if gui:
        app = QApplication(sys.argv)
        app.setApplicationName(f"{bidsmapfile} - BIDS editor {bids.version()}")
        mainwin = bidseditor.MainWindow()
        gui = bidseditor.Ui_MainWindow()
        gui.interactive = interactive
        gui.subprefix = subprefix
        gui.sesprefix = sesprefix

        if gui.interactive == 2:
            QMessageBox.information(mainwin, 'BIDS mapping workflow',
                                    f"The bidsmapper will now scan {bidsfolder} and whenever "
                                    f"it detects a new type of scan it will ask you to identify it.\n\n"
                                    f"It is important that you choose the correct BIDS datatype "
                                    f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n"
                                    f"At the end you will be shown an overview of all the "
                                    f"different scan types and BIDScoin options (as in the "
                                    f"bidseditor) that you can then (re)edit to your needs")

    # Loop over all subjects and sessions and built up the bidsmap entries
    dataformat = ''
    subjects   = bids.lsdirs(rawfolder, subprefix + '*')
    if not subjects:
        LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*')
        gui = None
    for n, subject in enumerate(subjects,1):

        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
            session, unpacked = bids.unpack(session, subprefix, sesprefix)
            if unpacked:
                store = dict(source=unpacked, target=bidscoinfolder/'provenance')
            elif store:
                store = dict(source=rawfolder, target=bidscoinfolder/'provenance')
            else:
                store = dict()

            # Loop of the different DICOM runs (series) and collect source files
            sourcefiles = []
            dataformat  = bids.get_dataformat(session)
            if not dataformat:
                LOGGER.info(f"Skipping: {session} (subject {n}/{len(subjects)})")
                continue

            LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})")

            if dataformat=='DICOM':
                for sourcedir in bids.lsdirs(session):
                    sourcefile = bids.get_dicomfile(sourcedir)
                    if sourcefile.name:
                        sourcefiles.append(sourcefile)

            if dataformat=='PAR':
                sourcefiles = bids.get_parfiles(session)

            if dataformat=='P7':
                sourcefiles = bids.get_p7file(session)

            # Update the bidsmap with the info from the source files
            for sourcefile in sourcefiles:
                bidsmap_new = build_bidsmap(dataformat, sourcefile, bidsmap_new, bidsmap_old, template, store, gui)

            # Update / append the nifti mapping
            if dataformat=='Nifti':
                bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old)

            # Update / append the file-system mapping
            if dataformat=='FileSystem':
                bidsmap_new = build_filesystemmap(session, bidsmap_new, bidsmap_old)

            # Update / append the plugin mapping
            if bidsmap_old['PlugIns']:
                bidsmap_new = build_pluginmap(session, bidsmap_new, bidsmap_old)

            # Clean-up the temporary unpacked data
            if unpacked:
                shutil.rmtree(session)

    if not dataformat:
        LOGGER.warning('Could not determine the dataformat of the source data')

    # (Re)launch the bidseditor UI_MainWindow
    bidsmapfile = bidscoinfolder/'bidsmap.yaml'
    if gui:
        if not dataformat:
            QMessageBox.information(mainwin, 'BIDS mapping workflow',
                                    'Could not determine the dataformat of the source data.\n'
                                    'You can try running the bidseditor tool yourself')
        else:
            QMessageBox.information(mainwin, 'BIDS mapping workflow',
                                    f"The bidsmapper has finished scanning {rawfolder}\n\n"
                                    f"Please carefully check all the different BIDS output names "
                                    f"and BIDScoin options and (re)edit them to your needs.\n\n"
                                    f"You can always redo this step later by re-running the "
                                    f"bidsmapper or by just running the bidseditor tool")

            LOGGER.info('Opening the bidseditor')
            gui.setupUi(mainwin, bidsfolder, bidsmapfile, bidsmap_new, copy.deepcopy(bidsmap_new), template, dataformat, subprefix=subprefix, sesprefix=sesprefix)
            mainwin.show()
            app.exec()
    else:
        # Save the bidsmap in the bidscoinfolder
        bids.save_bidsmap(bidsmapfile, bidsmap_new)

    LOGGER.info('-------------- FINISHED! -------------------')
    LOGGER.info('')

    bids.reporterrors()