Example #1
0
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the
    corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started and see what dataformat we have
    options = bidsmap['Options']['plugins']['dcm2niix2bids']
    datasource = bids.get_datasource(session, {'dcm2niix2bids': options})
    dataformat = datasource.dataformat
    if not dataformat:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Make a list of all the data sources / runs
    manufacturer = 'UNKNOWN'
    sources = []
    if dataformat == 'DICOM':
        sources = bidscoin.lsdirs(session)
        manufacturer = datasource.attributes('Manufacturer')
    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    sourcefile = Path()
    for source in sources:

        # Get a sourcefile
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options},
                                     dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datasource.datatype
        else:
            outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval',
                        '.bvec'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(
                    source, 2).name:  # TODO: issue warning or support PAR
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                command=options['command'],
                args=options.get('args', ''),
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bidscoin.run_command(command):
                if not list(outfolder.glob(f"{bidsname}.nii*")): continue
            if list(outfolder.glob(f"{bidsname}a.nii*")):
                LOGGER.warning(
                    f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)"
                )

            # Replace uncropped output image with the cropped one
            if '-x y' in options.get('args', ''):
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC',
                                 '_fieldmaphz')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*.nii*")
                ]))
            if not jsonfiles[0].is_file(
            ) and dcm2niixfiles:  # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo)
                jsonfiles.pop(0)
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if 'echo' in run['bids'] and postfix.startswith('e'):
                        echonr = f"_{postfix}".replace('_e',
                                                       '')  # E.g. postfix='e1'
                        if not echonr:
                            echonr = '1'
                        if echonr.isnumeric():
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'echo', echonr.lstrip('0')
                            )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness
                        else:
                            LOGGER.error(
                                f"Unexpected postix '{postfix}' found in {dcm2niixfile}"
                            )
                            newbidsname = bids.get_bidsvalue(
                                newbidsname, 'dummy', postfix
                            )  # Append the unknown postfix to the acq-label

                    # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info
                    elif 'part' in run['bids'] and postfix in (
                            'ph', 'real', 'imaginary'
                    ):  # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0]
                        if postfix == 'ph':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'phase')
                        if postfix == 'real':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'real')
                        if postfix == 'imaginary':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'imag')

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][
                            'suffixes']:  # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions
                        if len(dcm2niixfiles) not in (
                                1, 2, 3, 4
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.debug(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1'
                        )  # Case 2 = Two phase and magnitude images
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')  # Case 2
                        if len(dcm2niixfiles) in (
                                2, 3
                        ):  # Case 1 = One or two magnitude + one phasediff image
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff')
                            newbidsname = newbidsname.replace(
                                '_magnitude2_ph', '_phasediff')
                        newbidsname = newbidsname.replace(
                            '_phasediff_e1', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_e2', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_ph', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_magnitude1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude'
                        )  # Case 3 = One magnitude + one fieldmap image
                        if len(dcm2niixfiles) == 2:
                            newbidsname = newbidsname.replace(
                                '_fieldmap_e1', '_magnitude'
                            )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_e1', '_fieldmap')  # Case 3
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap'
                        )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')  # Case 3

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                    # The ADC images are not BIDS compliant
                    if postfix == 'ADC':
                        LOGGER.warning(
                            f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file"
                        )

                # Save the nifti file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)

                # Rename all associated files (i.e. the json-, bval- and bvec-files)
                oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json')
                newjsonfile = newbidsfile.with_suffix('').with_suffix('.json')
                if not oldjsonfile.is_file():
                    LOGGER.warning(
                        f"Unexpected file conversion result: {oldjsonfile} not found"
                    )
                else:
                    if oldjsonfile in jsonfiles:
                        jsonfiles.remove(oldjsonfile)
                    if newjsonfile not in jsonfiles:
                        jsonfiles.append(newjsonfile)
                for oldfile in outfolder.glob(
                        dcm2niixfile.with_suffix('').stem + '.*'):
                    oldfile.replace(
                        newjsonfile.with_suffix(''.join(oldfile.suffixes)))

        # Copy over the source meta-data
        metadata = bids.copymetadata(sourcefile, outfolder / bidsname,
                                     options.get('meta', []))

        # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Load the json meta-data
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)

            # Add all the source meta data to the meta-data
            for metakey, metaval in metadata.items():
                if jsondata.get(metakey) == metaval:
                    LOGGER.warning(
                        f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}"
                    )
                jsondata[metakey] = metaval

            # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later
            for metakey, metaval in run['meta'].items():
                if metakey != 'IntendedFor':
                    metaval = datasource.dynamicvalue(metaval,
                                                      cleanup=False,
                                                      runtime=True)
                    try:
                        metaval = ast.literal_eval(str(metaval))
                    except (ValueError, SyntaxError):
                        pass
                    LOGGER.info(
                        f"Adding '{metakey}: {metaval}' to: {jsonfile}")
                if not metaval:
                    metaval = None
                jsondata[metakey] = metaval

            # Remove unused (but added from the template) B0FieldIdentifiers/Sources
            if not jsondata.get('B0FieldSource'):
                jsondata.pop('B0FieldSource', None)
            if not jsondata.get('B0FieldIdentifier'):
                jsondata.pop('B0FieldIdentifier', None)

            # Save the meta-data to the json sidecar-file
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)

            # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition)
            outputfile = [
                file for file in jsonfile.parent.glob(jsonfile.stem + '.*')
                if file.suffix in ('.nii', '.gz')
            ]  # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension)
            if not outputfile:
                LOGGER.exception(
                    f"No data-file found with {jsonfile} when updating {scans_tsv}"
                )
            elif datasource.datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datasource.datatype):
                acq_time = ''
                if dataformat == 'DICOM':
                    acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}"
                elif dataformat == 'PAR':
                    acq_time = datasource.attributes('exam_date')
                if not acq_time or acq_time == 'T':
                    acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}"
                try:
                    acq_time = dateutil.parser.parse(acq_time)
                    if options.get('anon', 'y') in ('y', 'yes'):
                        acq_time = acq_time.replace(
                            year=1925, month=1, day=1
                        )  # Privacy protection (see BIDS specification)
                    acq_time = acq_time.isoformat()
                except Exception as jsonerror:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}"
                    )
                    acq_time = 'n/a'
                scanpath = outputfile[0].relative_to(bidsses)
                scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    personals = {}
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    personals['age'] = ''
    if dataformat == 'DICOM':
        age = datasource.attributes(
            'PatientAge'
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524
        elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
        elif age.endswith('M'): age = float(age.rstrip('M')) / 12
        elif age.endswith('Y'): age = float(age.rstrip('Y'))
        if age:
            if options.get('anon', 'y') in ('y', 'yes'):
                age = int(float(age))
            personals['age'] = str(age)
        personals['sex'] = datasource.attributes('PatientSex')
        personals['size'] = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')

    # Store the collected personals in the participants_table
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    for key in personals:  # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
        if key not in participants_table or participants_table[key].isnull(
        ).get(subid, True) or participants_table[key].get(subid) == 'n/a':
            participants_table.loc[subid, key] = personals[key]

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')
Example #2
0
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict,
                   bidsfolder: Path, personals: dict, subprefix: str,
                   sesprefix: str) -> None:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param dataformat:  The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat)
    :param session:     The full-path name of the subject/session source file/folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    if dataformat == 'DICOM':
        sourcefile = Path()
        sources = bids.lsdirs(session)
        for source in sources:
            sourcefile = bids.get_dicomfile(source)
            manufacturer = bids.get_dicomfield('Manufacturer', sourcefile)
            if sourcefile.name:
                break

    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
        if sources:
            sourcefile = sources[0]

    else:
        LOGGER.error(
            f"Unsupported data format: {dataformat}\nPlease report this bug")
        return

    if not sources:
        LOGGER.info(f"No data found for: {session}")
        return

    subid, sesid = bids.get_subid_sesid(sourcefile,
                                        bidsmap[dataformat]['subject'],
                                        bidsmap[dataformat]['session'],
                                        subprefix, sesprefix)

    if subid == subprefix:
        LOGGER.error(f"No valid subject identifier found for: {session}")
        return

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = bidsfolder / subid / sesid
    if bidsses.is_dir():
        LOGGER.warning(
            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner"
        )
    bidsses.mkdir(parents=True, exist_ok=True)
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    for source in sources:

        # Get a source-file
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        run, datatype, index = bids.get_matching_run(sourcefile, bidsmap,
                                                     dataformat)

        # Check if we should ignore this run
        if datatype == bids.ignoredatatype:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if index is None:
            LOGGER.error(
                f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datatype
        else:
            outfolder = bidsses / datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec',
                        'tsv.gz'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(source, 2).name:
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                path=bidsmap['Options']['dcm2niix']['path'],
                args=bidsmap['Options']['dcm2niix']['args'],
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bids.run_command(command):
                continue

            # Replace uncropped output image with the cropped one
            if '-x y' in bidsmap['Options']['dcm2niix']['args']:
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*")
                ]))
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if postfix[0] == 'e' and bids.get_bidsvalue(
                            newbidsname, 'echo'
                    ):  # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix
                        echonr = f"_{postfix}"  # E.g. echonr='_e1' or echonr='_pha'
                        for dcm2niixpostfix in dcm2niixpostfixes:
                            echonr = echonr.replace(
                                dcm2niixpostfix, ''
                            )  # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a']
                        if echonr.isalpha():
                            echonr = ord(
                                echonr
                            ) - 95  # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc
                        elif not echonr:
                            echonr = 1
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'echo', str(echonr)
                        )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in ('magnitude', 'magnitude1',
                                                   'magnitude2', 'phase1',
                                                   'phase2', 'phasediff',
                                                   'fieldmap'):
                        if len(dcm2niixfiles) not in (
                                0, 2, 4, 6, 8
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.warning(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude')
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap')
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2')
                        if len(dcm2niixfiles) == 8:
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phase1'
                            )  # Two magnitude + 2 phase images in one folder / datasource
                        else:
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff'
                            )  # One or two magnitude + 1 phasediff image
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2')
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1')
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2')

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                # Save the file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)
                if ext == '.json':
                    oldjsonfile = (outfolder / bidsname).with_suffix('.json')
                    if oldjsonfile in jsonfiles and not oldjsonfile.is_file():
                        jsonfiles.remove(
                            (outfolder / bidsname).with_suffix('.json'))
                    jsonfiles.append(newbidsfile)

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Check if dcm2niix behaved as expected
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Unexpected file conversion result: {jsonfile} not found")
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if datatype == 'dwi':
                bvecfile = jsonfile.with_suffix('.bvec')
                bvalfile = jsonfile.with_suffix('.bval')
                if not bvecfile.is_file():
                    LOGGER.info(f"Adding dummy bvec file: {bvecfile}")
                    with bvecfile.open('w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not bvalfile.is_file():
                    LOGGER.info(f"Adding dummy bval file: {bvalfile}")
                    with bvalfile.open('w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif datatype == 'func':
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    LOGGER.info(f"Adding TaskName to: {jsonfile}")
                    data['TaskName'] = run['bids']['task']
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition)
            niifile = list(
                jsonfile.parent.glob(jsonfile.stem + '.nii*')
            )  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
            if niifile and datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datatype):
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'AcquisitionTime' not in data or not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield(
                        'AcquisitionTime', sourcefile)  # DICOM
                if not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield(
                        'exam_date', sourcefile)  # PAR/XML
                try:
                    acq_time = dateutil.parser.parse(data['AcquisitionTime'])
                except:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}"
                    )
                    acq_time = dateutil.parser.parse('00:00:00')
                scanpath = niifile[0].relative_to(bidsses)
                scans_table.loc[
                    scanpath.as_posix(),
                    'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap[dataformat]['fmap'] is not None:
        for fieldmap in bidsmap[dataformat]['fmap']:
            bidsname = bids.get_bidsname(subid, sesid, fieldmap)
            niifiles = []
            intendedfor = fieldmap['bids']['IntendedFor']

            # Search for the imaging files that match the IntendedFor search criteria
            if intendedfor:
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for selector in intendedfor:
                    niifiles.extend(
                        [
                            Path(niifile).relative_to(bidsfolder / subid)
                            for niifile in sorted(
                                bidsses.rglob(f"*{selector}*.nii*"))
                            if selector
                        ]
                    )  # Search in all runs using a relative path to the subject folder
            else:
                intendedfor = []

            # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label)
            jsonfiles = []
            acqlabel = bids.get_bidsvalue(bidsname, 'acq')
            patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace(
                '_magnitude1',
                '_magnitude*').replace('_magnitude2', '_magnitude*').replace(
                    '_phase1', '_phase*').replace('_phase2', '_phase*'),
                        bidsname.replace('_run-1_', '_run-[0-9]*_').replace(
                            '_magnitude1',
                            '_phase*').replace('_magnitude2', '_phase*'))
            for pattern in patterns:
                jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json'))
                if acqlabel:
                    cepattern = bids.get_bidsvalue(pattern, 'acq',
                                                   acqlabel + '[CE][0-9]*')
                    jsonfiles.extend(
                        list((bidsses / 'fmap').glob(cepattern + '.json')))

            # Save the meta-data in the jsonfiles
            for jsonfile in sorted(set(jsonfiles)):

                # Add the IntendedFor data
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'IntendedFor' not in data:
                    if niifiles:
                        LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                    elif intendedfor:
                        LOGGER.warning(
                            f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results"
                        )
                    else:
                        LOGGER.warning(
                            f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty"
                        )
                    data['IntendedFor'] = [
                        niifile.as_posix() for niifile in niifiles
                    ]  # The path needs to use forward slashes instead of backward slashes
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

                # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file
                if jsonfile.name.endswith('phasediff.json'):
                    json_magnitude = [None, None]
                    TE = [None, None]
                    for n in (0, 1):
                        json_magnitude[
                            n] = jsonfile.parent / jsonfile.name.replace(
                                '_phasediff', f"_magnitude{n+1}")
                        if not json_magnitude[n].is_file():
                            LOGGER.error(
                                f"Could not find expected magnitude{n+1} image associated with: {jsonfile}"
                            )
                        else:
                            with json_magnitude[n].open('r') as json_fid:
                                data = json.load(json_fid)
                            TE[n] = data['EchoTime']
                    if None in TE:
                        LOGGER.error(
                            f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}"
                        )
                    elif TE[0] > TE[1]:
                        LOGGER.error(
                            f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}"
                        )
                    else:
                        with jsonfile.open('r') as json_fid:
                            data = json.load(json_fid)
                        data['EchoTime1'] = TE[0]
                        data['EchoTime2'] = TE[1]
                        LOGGER.info(
                            f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}"
                        )
                        with jsonfile.open('w') as json_fid:
                            json.dump(data, json_fid, indent=4)

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat == 'DICOM' and sourcefile.name:
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return  # Only take data from the first session -> BIDS specification
        age = bids.get_dicomfield(
            'PatientAge', sourcefile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile)
        personals['size'] = bids.get_dicomfield('PatientSize', sourcefile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
Example #3
0
def addmetadata(bidsses: Path, subid: str, sesid: str) -> None:
    """
    Adds the special fieldmap metadata (IntendedFor, B0FieldIdentifier, TE, etc)

    :param bidsses: The session folder with the BIDS session data
    :param subid:   The subject 'sub-label' identifier
    :param sesid:   The session 'ses-label' identifier
    """

    # Add IntendedFor search results and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed until all datatypes have been processed (i.e. so that all target images are indeed on disk)
    if (bidsses / 'fmap').is_dir():

        scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
        if scans_tsv.is_file():
            scans_table = pd.read_csv(scans_tsv,
                                      sep='\t',
                                      index_col='filename')
        else:
            scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
            scans_table.index.name = 'filename'

        fmaps = [
            fmap.relative_to(bidsses).as_posix()
            for fmap in sorted((bidsses / 'fmap').glob('sub-*.nii*'))
        ]
        for fmap in fmaps:

            # Check if there are multiple runs and get the lower- and upperbound from the AcquisitionTime
            runindex = bids.get_bidsvalue(fmap, 'run')
            prevfmap = bids.get_bidsvalue(fmap, 'run', int(runindex) - 1)
            nextfmap = bids.get_bidsvalue(fmap, 'run', int(runindex) + 1)
            acqtime = scans_table.loc[fmap, 'acq_time']
            fmaptime = dateutil.parser.parse(
                acqtime if isinstance(acqtime, str) else '1925-01-01')
            lowerbound = fmaptime.replace(hour=0, minute=0, second=0)
            upperbound = fmaptime.replace(hour=23, minute=59, second=59)
            if runindex and prevfmap in fmaps:
                lowerbound = dateutil.parser.parse(scans_table.loc[prevfmap,
                                                                   'acq_time'])
            if runindex and nextfmap in fmaps:
                upperbound = dateutil.parser.parse(scans_table.loc[nextfmap,
                                                                   'acq_time'])

            # Load the existing meta-data
            jsonfile = bidsses / Path(fmap).with_suffix('').with_suffix(
                '.json')
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)

            # Search for the imaging files that match the IntendedFor search criteria
            intendedfor = jsondata.get('IntendedFor')
            if intendedfor and isinstance(intendedfor, str):

                # Search with multiple patterns for matching nifti-files in all runs and store the relative path to the session folder
                niifiles = []
                if intendedfor.startswith('<') and intendedfor.endswith('>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for part in intendedfor:
                    limits = part.split(':', 1)[1].strip(
                    ) if ':' in part else ''  # part = 'pattern: [lowerlimit:upperlimit]'
                    pattern = part.split(':', 1)[0].strip()
                    matches = [
                        niifile.relative_to(bidsses).as_posix()
                        for niifile in sorted(
                            bidsses.rglob(f"*{pattern}*.nii*")) if pattern
                    ]
                    if limits and matches:
                        limits = limits[1:-1].split(
                            ':', 1
                        )  # limits: '[lowerlimit:upperlimit]' -> ['lowerlimit', 'upperlimit']
                        lowerlimit = int(
                            limits[0]) if limits[0].strip() else float('-inf')
                        upperlimit = int(
                            limits[1]) if limits[1].strip() else float('inf')
                        acqtimes = []
                        for match in matches:
                            acqtimes.append(
                                (dateutil.parser.parse(
                                    scans_table.loc[match, 'acq_time']), match)
                            )  # Time + filepath relative to the session-folder
                        acqtimes.sort(key=lambda acqtime: acqtime[0])
                        offset = sum([
                            acqtime[0] < fmaptime for acqtime in acqtimes
                        ])  # The nr of preceding series
                        for n, acqtime in enumerate(acqtimes):
                            if lowerbound < acqtime[
                                    0] < upperbound and lowerlimit <= n - offset < upperlimit:
                                niifiles.append(acqtime[1])
                    else:
                        niifiles.extend(matches)

                # Add the IntendedFor data. NB: The paths need to use forward slashes and be relative to the subject folder
                if niifiles:
                    LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                    jsondata['IntendedFor'] = [
                        (Path(sesid) / niifile).as_posix()
                        for niifile in niifiles
                    ]
                else:
                    LOGGER.warning(
                        f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results"
                    )
                    jsondata['IntendedFor'] = None

            elif not (jsondata.get('B0FieldSource')
                      or jsondata.get('B0FieldIdentifier')):
                LOGGER.warning(
                    f"Empty IntendedFor / B0FieldSource / B0FieldIdentifier fieldmap values in {jsonfile} (i.e. the fieldmap may not be used)"
                )

            # Work-around because the bids-validator (v1.8) cannot handle `null` values / unused IntendedFor fields
            if not jsondata.get('IntendedFor'):
                jsondata.pop('IntendedFor', None)

            # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file
            if jsonfile.name.endswith('phasediff.json'):
                json_magnitude = [None, None]
                echotime = [None, None]
                for n in (0, 1):
                    json_magnitude[
                        n] = jsonfile.parent / jsonfile.name.replace(
                            '_phasediff', f"_magnitude{n+1}")
                    if not json_magnitude[n].is_file():
                        LOGGER.error(
                            f"Could not find expected magnitude{n+1} image associated with: {jsonfile}"
                        )
                    else:
                        with json_magnitude[n].open('r') as json_fid:
                            data = json.load(json_fid)
                        echotime[n] = data.get('EchoTime')
                jsondata['EchoTime1'] = jsondata['EchoTime2'] = None
                if None in echotime:
                    LOGGER.error(
                        f"Cannot find and add valid EchoTime1={echotime[0]} and EchoTime2={echotime[1]} data to: {jsonfile}"
                    )
                elif echotime[0] > echotime[1]:
                    LOGGER.error(
                        f"Found invalid EchoTime1={echotime[0]} > EchoTime2={echotime[1]} for: {jsonfile}"
                    )
                else:
                    jsondata['EchoTime1'] = echotime[0]
                    jsondata['EchoTime2'] = echotime[1]
                    LOGGER.info(
                        f"Adding EchoTime1: {echotime[0]} and EchoTime2: {echotime[1]} to {jsonfile}"
                    )

            # Save the collected meta-data to disk
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)
Example #4
0
def echocombine(bidsdir: str,
                pattern: str,
                subjects: list,
                output: str,
                algorithm: str,
                weights: list,
                force: bool = False):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike recursive search pattern (relative to the subject/session folder) to select the first echo of the images that need to be combined, e.g. '*task-*echo-1*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the output is saved. It can be the name of a BIDS datatype folder, such as 'func', or of the derivatives folder, i.e. 'derivatives'. If output = [the name of the input datatype folder] then the original echo images are replaced by one combined image. If output is left empty then the combined image is saved in the input datatype folder and the original echo images are moved to the {bids.unknowndatatype} folder
    :param algorithm:   Combination algorithm, either 'PAID', 'TE' or 'average'
    :param weights:     Weights for each echo
    :param force:       Boolean to overwrite existing ME target files
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bids.setup_logging(bidsdir / 'code' / 'bidscoin' / 'echocombine.log')
    LOGGER.info('')
    LOGGER.info(f"--------- START echocombine ---------")
    LOGGER.info(
        f">>> echocombine bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
        f" algorithm={algorithm} weights={weights}")

    if 'echo' not in pattern:
        LOGGER.warning(
            f"Missing 'echo-#' substring in glob-like search pattern, i.e. '{pattern}' does not seem to select the first echo"
        )

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = [
            'sub-' + subject.replace('^sub-', '') for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            bidsdir / subject for subject in subjects
            if (bidsdir / subject).is_dir()
        ]

    # Loop over bids subject/session-directories
    for n, subject in enumerate(subjects, 1):

        sessions = bids.lsdirs(subject, 'ses-*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            LOGGER.info('-------------------------------------')
            LOGGER.info(
                f"Combining echos for ({n}/{len(subjects)}): {session}")

            sub_id, ses_id = bids.get_subid_sesid(session / 'dum.my')

            # Search for multi-echo matches
            for match in sorted([
                    match for match in session.rglob(pattern)
                    if '.nii' in match.suffixes
            ]):

                # Check if it is normal/BIDS multi-echo data
                datatype = match.parent.name
                echonr = bids.get_bidsvalue(match, 'echo')
                mepattern = bids.get_bidsvalue(match, 'echo', '*')
                echos = sorted(match.parent.glob(mepattern.name))
                newechos = [
                    echo.parents[1] / bids.unknowndatatype / echo.name
                    for echo in echos
                ]
                if not echonr:
                    LOGGER.warning(
                        f"No 'echo' key-value pair found in the filename, skipping: {match}"
                    )
                    continue
                if len(echos) == 1:
                    LOGGER.warning(
                        f"Only one echo image found, nothing to do for: {match}"
                    )
                    continue

                # Construct the combined-echo output filename and check if that file already exists
                cename = match.name.replace(f"_echo-{echonr}", '')
                if not output:
                    cefile = session / datatype / cename
                elif output == 'derivatives':
                    cefile = bidsdir / 'derivatives' / 'multiecho' / sub_id / ses_id / datatype / cename
                else:
                    cefile = session / output / cename
                cefile.parent.mkdir(parents=True, exist_ok=True)
                if cefile.is_file() and not force:
                    LOGGER.warning(
                        f"Outputfile {cefile} already exists, skipping: {match}"
                    )
                    continue

                # Combine the multi-echo images
                me.me_combine(mepattern,
                              cefile,
                              algorithm,
                              weights,
                              saveweights=False,
                              logger=LOGGER.name)

                # (Re)move the original multi-echo images
                if not output:
                    for echo, newecho in zip(echos, newechos):
                        LOGGER.info(
                            f"Moving original echo image: {echo} -> {newecho}")
                        newecho.parent.mkdir(parents=True, exist_ok=True)
                        echo.replace(newecho)
                        echo.with_suffix('').with_suffix('.json').replace(
                            newecho.with_suffix('').with_suffix('.json'))
                elif output == datatype:
                    for echo in echos:
                        LOGGER.info(f"Removing original echo image: {echo}")
                        echo.unlink()
                        echo.with_suffix('').with_suffix('.json').unlink()

                # Construct relative path names as they are used in BIDS
                echos_rel = [str(echo.relative_to(session)) for echo in echos]
                newechos_rel = [
                    str(echo.relative_to(session)) for echo in newechos
                ]
                if output != 'derivatives':
                    cefile_rel = str(cefile.relative_to(session))

                # Update the IntendedFor fields in the fieldmap sidecar files (i.e. remove the old echos, add the echo-combined image and, optionally, the new echos)
                if output != 'derivatives' and (session / 'fmap').is_dir():
                    for fmap in (session / 'fmap').glob('*.json'):
                        with fmap.open('r') as fmap_fid:
                            fmap_data = json.load(fmap_fid)
                        if 'IntendedFor' in fmap_data:
                            intendedfor = fmap_data['IntendedFor']
                            if type(intendedfor) == str:
                                intendedfor = [intendedfor]
                            if echos_rel[0] in intendedfor:
                                LOGGER.info(
                                    f"Updating 'IntendedFor' to {cefile_rel} in {fmap}"
                                )
                                if not output:
                                    intendedfor = [
                                        file for file in intendedfor
                                        if not file in echos_rel
                                    ] + [cefile_rel] + [
                                        newecho for newecho in newechos_rel
                                    ]
                                elif output == datatype:
                                    intendedfor = [
                                        file for file in intendedfor
                                        if not file in echos_rel
                                    ] + [cefile_rel]
                                else:
                                    intendedfor = intendedfor + [cefile_rel]
                                fmap_data['IntendedFor'] = intendedfor
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                # Update the scans.tsv file
                if (bidsdir / '.bidsignore').is_file():
                    with (bidsdir / '.bidsignore').open('r') as fid:
                        bidsignore = fid.read().splitlines()
                else:
                    bidsignore = [bids.unknowndatatype + '/']
                bidsignore.append('derivatives/')
                scans_tsv = session / f"{sub_id}{bids.add_prefix('_',ses_id)}_scans.tsv"
                if output + '/' not in bidsignore and scans_tsv.is_file():

                    LOGGER.info(f"Adding {cefile_rel} to {scans_tsv}")
                    scans_table = pd.read_csv(scans_tsv,
                                              sep='\t',
                                              index_col='filename')
                    scans_table.loc[cefile_rel] = scans_table.loc[echos_rel[0]]

                    for echo, newecho in zip(echos_rel, newechos_rel):
                        if not output:
                            LOGGER.info(
                                f"Updating {echo} -> {newecho} in {scans_tsv}")
                            scans_table.loc[newecho] = scans_table.loc[echo]
                            scans_table.drop(echo, inplace=True)
                        elif output == datatype:
                            LOGGER.info(f"Removing {echo} from {scans_tsv}")
                            scans_table.drop(echo, inplace=True)

                    scans_table.sort_values(by=['acq_time', 'filename'],
                                            inplace=True)
                    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Example #5
0
def coin_dicom(session: Path, bidsmap: dict, bidsfolder: Path, personals: dict,
               subprefix: str, sesprefix: str) -> None:
    """
    Converts the session dicom-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the dicom header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    if not bids.lsdirs(session):
        LOGGER.warning(f"No run subfolder(s) found in: {session}")
        return

    TE = [None, None]

    # Get valid BIDS subject/session identifiers from the (first) dicom-header or from the session source folder
    subid, sesid = bids.get_subid_sesid(
        bids.get_dicomfile(bids.lsdirs(session)[0]),
        bidsmap['DICOM']['subject'], bidsmap['DICOM']['session'], subprefix,
        sesprefix)
    if subid == subprefix:
        LOGGER.error(f"No valid subject identifier found for: {session}")
        return

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = bidsfolder / subid / sesid
    if bidsses.is_dir():
        LOGGER.warning(
            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner"
        )
    bidsses.mkdir(parents=True, exist_ok=True)
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the dicom run subfolders
    for runfolder in bids.lsdirs(session):

        # Get a dicom-file
        dicomfile = bids.get_dicomfile(runfolder)
        if not dicomfile.name: continue

        # Get a matching run from the bidsmap
        run, modality, index = bids.get_matching_run(dicomfile, bidsmap)

        # Check if we should ignore this run
        if modality == bids.ignoremodality:
            LOGGER.info(f"Leaving out: {runfolder}")
            continue

        # Check if we already know this run
        if index is None:
            LOGGER.warning(
                f"Skipping unknown '{modality}': {dicomfile}\n-> re-run the bidsmapper and delete {session} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {runfolder}")

        # Create the BIDS session/modality folder
        bidsmodality = bidsses / modality
        bidsmodality.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, modality, run)
        runindex = run['bids']['run']
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(bidsmodality, bidsname)

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (bidsmodality / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{bidsmodality/bidsname}.* already exists -- check your results carefully!"
            )

        # Convert the dicom-files in the run folder to nifti's in the BIDS-folder
        command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{infolder}"'.format(
            path=bidsmap['Options']['dcm2niix']['path'],
            args=bidsmap['Options']['dcm2niix']['args'],
            filename=bidsname,
            outfolder=bidsmodality,
            infolder=runfolder)
        if not bids.run_command(command):
            continue

        # Replace uncropped output image with the cropped one
        if '-x y' in bidsmap['Options']['dcm2niix']['args']:
            for filename in sorted(
                    bidsmodality.glob(bidsname +
                                      '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                ext = ''.join(filename.suffixes)
                newfilename = str(filename).rsplit(ext, 1)[0].rsplit(
                    '_Crop_', 1)[0] + ext
                LOGGER.info(
                    f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}"
                )
                filename.replace(newfilename)

        # Rename all files ending with _c%d, _e%d and _ph (and any combination of these): These are produced by dcm2niix for multi-coil data, multi-echo data and phase data, respectively
        jsonfiles = [
        ]  # Collect the associated json-files (for updating them later) -- possibly > 1
        for dcm2niisuffix in ('_c', '_e', '_ph', '_i'):
            for filename in sorted(
                    bidsmodality.glob(bidsname + dcm2niisuffix + '*')):
                ext = ''.join(filename.suffixes)
                basepath, index = str(filename).rsplit(ext, 1)[0].rsplit(
                    dcm2niisuffix, 1
                )  # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1)
                basesuffix = basepath.rsplit(
                    '_', 1
                )[1]  # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1
                index = index.split('_')[0].zfill(
                    2
                )  # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files)

                # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file
                if dcm2niisuffix in (
                        '_c', '_e'
                ) and int(index) == 2 and basesuffix not in [
                        'magnitude1', 'phase1'
                ]:  # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below)
                    filename_ce = Path(
                        basepath + ext)  # The file without the _c1/_e1 suffix
                    if dcm2niisuffix == '_e' and bids.get_bidsvalue(
                            basepath, 'echo'):
                        newbasepath_ce = Path(
                            bids.get_bidsvalue(basepath, 'echo', '1'))
                    else:
                        newbasepath_ce = Path(
                            bids.get_bidsvalue(
                                basepath, 'dummy',
                                dcm2niisuffix.upper() + '1'.zfill(len(index)))
                        )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data
                    newfilename_ce = newbasepath_ce.with_suffix(
                        ext)  # The file as it should have been
                    if filename_ce.is_file():
                        if filename_ce != newfilename_ce:
                            LOGGER.info(
                                f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}"
                            )
                            filename_ce.replace(newfilename_ce)
                        if ext == '.json':
                            jsonfiles.append(
                                newbasepath_ce.with_suffix('.json'))

                # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder)
                if dcm2niisuffix == '_e' and bids.get_bidsvalue(
                        basepath, 'echo') and index:
                    basepath = bids.get_bidsvalue(
                        basepath, 'echo', str(int(index))
                    )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                elif dcm2niisuffix == '_e' and basesuffix in (
                        'magnitude1',
                        'magnitude2') and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + str(
                        int(index)
                    )  # basepath: *_magnitude1_e[index] -> *_magnitude[index]
                    # Collect the echo times that need to be added to the json-file (see below)
                    if filename.suffix == '.json':
                        with filename.open('r') as json_fid:
                            data = json.load(json_fid)
                        TE[int(index) - 1] = data['EchoTime']
                        LOGGER.info(
                            f"Collected EchoTime{index} = {data['EchoTime']} from: {filename}"
                        )
                elif dcm2niisuffix == '_e' and basesuffix == 'phasediff' and index:  # i.e. modality == 'fmap'
                    pass

                elif dcm2niisuffix == '_e' and basesuffix in [
                        'phase1', 'phase2'
                ] and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + str(
                        int(index)
                    )  # basepath: *_phase1_e[index]_ph -> *_phase[index]

                else:
                    basepath = bids.get_bidsvalue(
                        basepath, 'dummy',
                        dcm2niisuffix.upper() + index
                    )  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data

                # Save the file with a new name
                newbidsname = str(Path(basepath).name)
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        bidsmodality, newbidsname, ext
                    )  # Update the runindex now that the acq-label has changed
                newfilename = (bidsmodality / newbidsname).with_suffix(ext)
                LOGGER.info(
                    f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}"
                )
                filename.replace(newfilename)
                if ext == '.json':
                    jsonfiles.append(
                        (bidsmodality / newbidsname).with_suffix('.json'))

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        if not jsonfiles:
            jsonfiles = [(bidsmodality / bidsname).with_suffix('.json')]
        for jsonfile in set(jsonfiles):

            # Check if dcm2niix behaved as expected
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Unexpected file conversion result: {jsonfile} not found")
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if modality == 'dwi':
                bvecfile = jsonfile.with_suffix('.bvec')
                bvalfile = jsonfile.with_suffix('.bval')
                if not bvecfile.is_file():
                    LOGGER.info(f"Adding dummy bvec file: {bvecfile}")
                    with bvecfile.open('w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not bvalfile.is_file():
                    LOGGER.info(f"Adding dummy bval file: {bvalfile}")
                    with bvalfile.open('w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif modality == 'func':
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    LOGGER.info(f"Adding TaskName to: {jsonfile}")
                    data['TaskName'] = run['bids']['task']
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Add the EchoTime(s) used to create the difference image to the fmap json-file. NB: This assumes the magnitude runs have already been parsed (i.e. their nifti's had an _e suffix) -- This is normally the case for Siemens (phase-runs being saved after the magnitude runs
            elif modality == 'fmap':
                if run['bids']['suffix'] == 'phasediff':
                    LOGGER.info(
                        f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}"
                    )
                    if TE[0] is None or TE[1] is None:
                        LOGGER.warning(
                            f"Missing Echo-Time data for: {jsonfile}")
                    elif TE[0] > TE[1]:
                        LOGGER.warning(
                            f"EchoTime1 > EchoTime2 for: {jsonfile}")
                    with jsonfile.open('r') as json_fid:
                        data = json.load(json_fid)
                    data['EchoTime1'] = TE[0]
                    data['EchoTime2'] = TE[1]
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Parse the acquisition time from the json file or else from the dicom header (NB: assuming the dicom file represents the first aqcuisition)
            with jsonfile.open('r') as json_fid:
                data = json.load(json_fid)
            if 'AcquisitionTime' not in data:
                data['AcquisitionTime'] = bids.get_dicomfield(
                    'AcquisitionTime', dicomfile)
            acq_time = dateutil.parser.parse(data['AcquisitionTime'])
            scanpath = list(
                jsonfile.parent.glob(jsonfile.stem + '.nii*')
            )[0].relative_to(
                bidsses
            )  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
            scans_table.loc[
                scanpath.as_posix(),
                'acq_time'] = '1900-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Search for the IntendedFor images and add them to the json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap['DICOM']['fmap'] is not None:
        for fieldmap in bidsmap['DICOM']['fmap']:
            bidsname = bids.get_bidsname(subid, sesid, 'fmap', fieldmap)
            niifiles = []
            intendedfor = fieldmap['bids']['IntendedFor']

            # Search for the imaging files that match the IntendedFor search criteria
            if intendedfor:
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for selector in intendedfor:
                    niifiles.extend(
                        [
                            Path(niifile).relative_to(bidsfolder / subid)
                            for niifile in sorted(
                                bidsses.rglob(f"*{selector}*.nii*"))
                            if selector
                        ]
                    )  # Search in all runs using a relative path to the subject folder
            else:
                intendedfor = []

            # Save the IntendedFor data in the json-files (account for multiple runs and dcm2niix suffixes inserted into the acquisition label)
            acqlabel = bids.get_bidsvalue(bidsname, 'acq')
            for jsonfile in list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_') + '.json')) + \
                            list((bidsses/'fmap').glob(bidsname.replace('_run-1_', '_run-[0-9]*_').replace(acqlabel, acqlabel+'[CE][0-9]*') + '.json')):

                if niifiles:
                    LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                elif intendedfor:
                    LOGGER.warning(
                        f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results"
                    )
                else:
                    LOGGER.warning(
                        f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty"
                    )
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                data['IntendedFor'] = [
                    niifile.as_posix() for niifile in niifiles
                ]  # The path needs to use forward slashes instead of backward slashes
                with jsonfile.open('w') as json_fid:
                    json.dump(data, json_fid, indent=4)

                # Catch magnitude2 and phase2 files produced by dcm2niix (i.e. magnitude1 & magnitude2 both in the same runfolder)
                if jsonfile.name.endswith(
                        'magnitude1.json') or jsonfile.name.endswith(
                            'phase1.json'):
                    jsonfile2 = jsonfile.with_name(
                        jsonfile.name.rsplit('1.json', 1)[0] + '2.json')
                    if jsonfile2.is_file():
                        with jsonfile2.open('r') as json_fid:
                            data = json.load(json_fid)
                        if 'IntendedFor' not in data:
                            if niifiles:
                                LOGGER.info(
                                    f"Adding IntendedFor to: {jsonfile2}")
                            else:
                                LOGGER.warning(
                                    f"Empty 'IntendedFor' fieldmap value in {jsonfile2}: the search for {intendedfor} gave no results"
                                )
                            data['IntendedFor'] = [
                                niifile.as_posix() for niifile in niifiles
                            ]  # The path needs to use forward slashes instead of backward slashes
                            with jsonfile2.open('w') as json_fid:
                                json.dump(data, json_fid, indent=4)

    # Collect personal data from the DICOM header: only from the first session (-> BIDS specification)
    if 'runfolder' in locals():
        dicomfile = bids.get_dicomfile(runfolder)
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return
        age = bids.get_dicomfield(
            'PatientAge', dicomfile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', dicomfile)
        personals['size'] = bids.get_dicomfield('PatientSize', dicomfile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', dicomfile)
def echocombine(bidsdir: str, pattern: str, subjects: list, output: str, algorithm: str, weights: list, force: bool=False):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike recursive search pattern (relative to the subject/session folder) to select the first echo of the images that need to be combined, e.g. '*task-*echo-1*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the output is saved. It can be the name of a BIDS datatype folder, such as 'func', or of the derivatives folder, i.e. 'derivatives'. If output = [the name of the input datatype folder] then the original echo images are replaced by one combined image. If output is left empty then the combined image is saved in the input datatype folder and the original echo images are moved to the {unknowndatatype} folder
    :param algorithm:   Combination algorithm, either 'PAID', 'TE' or 'average'
    :param weights:     Weights for each echo
    :param force:       Boolean to overwrite existing ME target files
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bidscoin.setup_logging(bidsdir/'code'/'bidscoin'/'echocombine.log')
    LOGGER.info('')
    LOGGER.info(f"--------- START echocombine ---------")
    LOGGER.info(f">>> echocombine bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
                f" algorithm={algorithm} weights={weights}")

    # Get the list of subjects
    if not subjects:
        subjects = bidscoin.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = ['sub-' + subject.replace('sub-', '') for subject in subjects]              # Make sure there is a "sub-" prefix
        subjects = [bidsdir/subject for subject in subjects if (bidsdir/subject).is_dir()]

    # Loop over bids subject/session-directories
    with logging_redirect_tqdm():
        for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('-------------------------------------')
                LOGGER.info(f"Combining echos for ({n}/{len(subjects)}): {session}")

                subid, sesid = bids.DataSource(session/'dum.my', subprefix='sub-', sesprefix='ses-').subid_sesid()

                # Search for multi-echo matches
                for match in sorted([match for match in session.rglob(pattern) if '.nii' in match.suffixes]):

                    # Check if it is normal/BIDS multi-echo data or that the echo-number is appended to the acquisition label (as done in BIDScoin)
                    if '_echo-' in match.name:
                        echonr      = bids.get_bidsvalue(match, 'echo')
                        mepattern   = bids.get_bidsvalue(match, 'echo', '*')                        # The pattern that selects all echos
                        cename      = match.name.replace(f"_echo-{echonr}", '')                     # The combined-echo output filename
                    elif '_acq-' in match.name and bids.get_bidsvalue(match, 'acq').split('e')[-1].isnumeric():
                        acq, echonr = bids.get_bidsvalue(match, 'acq').rsplit('e',1)
                        mepattern   = bids.get_bidsvalue(match, 'acq', acq + 'e*')                  # The pattern that selects all echos
                        cename      = match.name.replace(f"_acq-{acq}e{echonr}", f"_acq-{acq}")     # The combined-echo output filename
                        LOGGER.info(f"No 'echo' key-value pair found in the filename, using the 'acq-{acq}e{echonr}' pair instead (BIDScoin-style)")
                    else:
                        LOGGER.warning(f"No 'echo' encoding found in the filename, skipping: {match}")
                        continue
                    echos     = sorted(match.parent.glob(mepattern.name))
                    newechos  = [echo.parents[1]/unknowndatatype/echo.name for echo in echos]
                    if len(echos) == 1:
                        LOGGER.warning(f"Only one echo image found, nothing to do for: {match}")
                        continue

                    # Construct the combined-echo output filename and check if that file already exists
                    datatype = match.parent.name
                    if not output:
                        cefile = session/datatype/cename
                    elif output == 'derivatives':
                        cefile = bidsdir/'derivatives'/'multiecho'/subid/sesid/datatype/cename
                    else:
                        cefile = session/output/cename
                    cefile.parent.mkdir(parents=True, exist_ok=True)
                    if cefile.is_file() and not force:
                        LOGGER.warning(f"Outputfile {cefile} already exists, skipping: {match}")
                        continue

                    # Combine the multi-echo images
                    me.me_combine(mepattern, cefile, algorithm, weights, saveweights=False)

                    # (Re)move the original multi-echo images
                    if not output:
                        for echo, newecho in zip(echos, newechos):
                            LOGGER.info(f"Moving original echo image: {echo} -> {newecho}")
                            newecho.parent.mkdir(parents=True, exist_ok=True)
                            echo.replace(newecho)
                            echo.with_suffix('').with_suffix('.json').replace(newecho.with_suffix('').with_suffix('.json'))
                    elif output == datatype:
                        for echo in echos:
                            LOGGER.info(f"Removing original echo image: {echo}")
                            echo.unlink()
                            echo.with_suffix('').with_suffix('.json').unlink()

                    # Construct the path names relative to the session folder (as in the scans.tsv file)
                    oldechos_rel = [echo.relative_to(session).as_posix() for echo in echos]
                    newechos_rel = [echo.relative_to(session).as_posix() for echo in echos + newechos if echo.is_file()]
                    if output == 'derivatives':
                        cefile_rel = ''                 # A remote folder cannot be specified as IntendedFor :-(
                    else:
                        cefile_rel = cefile.relative_to(session).as_posix()

                    # Update the IntendedFor fields of the fieldmaps (i.e. remove the old echos, add the echo-combined image and, optionally, the new echos)
                    if output != 'derivatives' and (session/'fmap').is_dir():
                        for fmap in (session/'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                metadata = json.load(fmap_fid)
                            intendedfor = metadata.get('IntendedFor', [])
                            if isinstance(intendedfor, str):
                                intendedfor = [intendedfor]
                            if sesid:                   # NB: IntendedFor is relative to the subject folder
                                intendedfor = [file.split(sesid+'/',1)[1] for file in intendedfor]
                            if oldechos_rel[0] in intendedfor:
                                LOGGER.info(f"Updating 'IntendedFor' in {fmap}")
                                relfiles                = [file for file in intendedfor if file not in oldechos_rel] + newechos_rel + [cefile_rel]
                                metadata['IntendedFor'] = [(Path(sesid)/relfile).as_posix() for relfile in relfiles]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(metadata, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir/'.bidsignore').is_file():
                        bidsignore = (bidsdir/'.bidsignore').read_text().splitlines()
                    else:
                        bidsignore = [unknowndatatype + '/']
                    scans_tsv = session/f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
                    if scans_tsv.is_file():

                        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
                        if oldechos_rel[0] in scans_table.index:
                            scans_table.loc['oldrow'] = scans_table.loc[oldechos_rel[0]]
                        elif 'acq_time' in scans_table:
                            with cefile.with_suffix('').with_suffix('.json').open('r') as fid:
                                metadata = json.load(fid)
                            date = scans_table.iloc[0]['acq_time'].split('T')[0]
                            scans_table.loc['oldrow', 'acq_time'] = f"{date}T{metadata.get('AcquisitionTime')}"
                        else:
                            scans_table.loc['oldrow'] = None

                        if output+'/' not in bidsignore + ['derivatives/'] and cefile.parent.name in bids.bidsdatatypes:
                            LOGGER.info(f"Adding '{cefile_rel}' to '{scans_tsv}'")
                            scans_table.loc[cefile_rel] = scans_table.loc['oldrow']

                        for echo in oldechos_rel + newechos_rel:
                            if echo in scans_table.index and not (session/echo).is_file():
                                LOGGER.info(f"Removing '{echo}' from '{scans_tsv}'")
                                scans_table.drop(echo, inplace=True)
                            elif echo not in scans_table.index and (session/echo).is_file() and echo.split('/')[0] in bids.bidsdatatypes:
                                LOGGER.info(f"Adding '{echo}' to '{scans_tsv}'")
                                scans_table.loc[echo] = scans_table.loc['oldrow']       # NB: Assuming that the echo-rows are all identical

                        scans_table.drop('oldrow', inplace=True)
                        scans_table.sort_values(by=['acq_time','filename'], inplace=True)
                        scans_table.replace('','n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
                        for scan in scans_table.index:
                            if not (session/scan).is_file():
                                LOGGER.warning(f"Found non-existent file '{scan}' in '{scans_tsv}'")

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')