Ejemplo n.º 1
0
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict,
                      template: dict, store: dict) -> None:
    """
    All the logic to map the DICOM/PAR source fields onto bids labels go into this function

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap_new: The new study bidsmap that we are building
    :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap
    :param template:    The template bidsmap with the default heuristics
    :param store:       The paths of the source- and target-folder
    :return:
    """

    # Get started
    plugin = {
        'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids']
    }
    datasource = bids.get_datasource(session, plugin)
    dataformat = datasource.dataformat
    if not dataformat:
        return

    # Collect the different DICOM/PAR source files for all runs in the session
    sourcefiles = []
    if dataformat == 'DICOM':
        for sourcedir in bidscoin.lsdirs(session):
            for n in range(
                    1
            ):  # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-()
                sourcefile = bids.get_dicomfile(sourcedir, n)
                if sourcefile.name:
                    sourcefiles.append(sourcefile)
    elif dataformat == 'PAR':
        sourcefiles = bids.get_parfiles(session)
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Update the bidsmap with the info from the source files
    for sourcefile in sourcefiles:

        # Input checks
        if not sourcefile.name or (not template[dataformat]
                                   and not bidsmap_old[dataformat]):
            LOGGER.error(
                f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}"
            )
            return

        # See if we can find a matching run in the old bidsmap
        datasource = bids.DataSource(sourcefile, plugin, dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap_old)

        # If not, see if we can find a matching run in the template
        if not match:
            run, _ = bids.get_matching_run(datasource, template)

        # See if we have collected the run somewhere in our new bidsmap
        if not bids.exist_run(bidsmap_new, '', run):

            # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample
            if not match:
                LOGGER.info(
                    f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}"
                )

            # Now work from the provenance store
            if store:
                targetfile = store['target'] / sourcefile.relative_to(
                    store['source'])
                targetfile.parent.mkdir(parents=True, exist_ok=True)
                run['provenance'] = str(shutil.copy2(sourcefile, targetfile))

            # Copy the filled-in run over to the new bidsmap
            bids.append_run(bidsmap_new, run)

        else:
            # Communicate with the user if the run was already present in bidsmap_old or in template
            LOGGER.debug(
                f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}"
            )
Ejemplo n.º 2
0
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the
    corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started and see what dataformat we have
    options = bidsmap['Options']['plugins']['dcm2niix2bids']
    datasource = bids.get_datasource(session, {'dcm2niix2bids': options})
    dataformat = datasource.dataformat
    if not dataformat:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Make a list of all the data sources / runs
    manufacturer = 'UNKNOWN'
    sources = []
    if dataformat == 'DICOM':
        sources = bidscoin.lsdirs(session)
        manufacturer = datasource.attributes('Manufacturer')
    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    sourcefile = Path()
    for source in sources:

        # Get a sourcefile
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options},
                                     dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datasource.datatype
        else:
            outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval',
                        '.bvec'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(
                    source, 2).name:  # TODO: issue warning or support PAR
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                command=options['command'],
                args=options.get('args', ''),
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bidscoin.run_command(command):
                if not list(outfolder.glob(f"{bidsname}.nii*")): continue
            if list(outfolder.glob(f"{bidsname}a.nii*")):
                LOGGER.warning(
                    f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)"
                )

            # Replace uncropped output image with the cropped one
            if '-x y' in options.get('args', ''):
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC',
                                 '_fieldmaphz')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*.nii*")
                ]))
            if not jsonfiles[0].is_file(
            ) and dcm2niixfiles:  # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo)
                jsonfiles.pop(0)
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if 'echo' in run['bids'] and postfix.startswith('e'):
                        echonr = f"_{postfix}".replace('_e',
                                                       '')  # E.g. postfix='e1'
                        if not echonr:
                            echonr = '1'
                        if echonr.isnumeric():
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'echo', echonr.lstrip('0')
                            )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness
                        else:
                            LOGGER.error(
                                f"Unexpected postix '{postfix}' found in {dcm2niixfile}"
                            )
                            newbidsname = bids.get_bidsvalue(
                                newbidsname, 'dummy', postfix
                            )  # Append the unknown postfix to the acq-label

                    # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info
                    elif 'part' in run['bids'] and postfix in (
                            'ph', 'real', 'imaginary'
                    ):  # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0]
                        if postfix == 'ph':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'phase')
                        if postfix == 'real':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'real')
                        if postfix == 'imaginary':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'imag')

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][
                            'suffixes']:  # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions
                        if len(dcm2niixfiles) not in (
                                1, 2, 3, 4
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.debug(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1'
                        )  # Case 2 = Two phase and magnitude images
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')  # Case 2
                        if len(dcm2niixfiles) in (
                                2, 3
                        ):  # Case 1 = One or two magnitude + one phasediff image
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff')
                            newbidsname = newbidsname.replace(
                                '_magnitude2_ph', '_phasediff')
                        newbidsname = newbidsname.replace(
                            '_phasediff_e1', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_e2', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_ph', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_magnitude1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude'
                        )  # Case 3 = One magnitude + one fieldmap image
                        if len(dcm2niixfiles) == 2:
                            newbidsname = newbidsname.replace(
                                '_fieldmap_e1', '_magnitude'
                            )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_e1', '_fieldmap')  # Case 3
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap'
                        )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')  # Case 3

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                    # The ADC images are not BIDS compliant
                    if postfix == 'ADC':
                        LOGGER.warning(
                            f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file"
                        )

                # Save the nifti file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)

                # Rename all associated files (i.e. the json-, bval- and bvec-files)
                oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json')
                newjsonfile = newbidsfile.with_suffix('').with_suffix('.json')
                if not oldjsonfile.is_file():
                    LOGGER.warning(
                        f"Unexpected file conversion result: {oldjsonfile} not found"
                    )
                else:
                    if oldjsonfile in jsonfiles:
                        jsonfiles.remove(oldjsonfile)
                    if newjsonfile not in jsonfiles:
                        jsonfiles.append(newjsonfile)
                for oldfile in outfolder.glob(
                        dcm2niixfile.with_suffix('').stem + '.*'):
                    oldfile.replace(
                        newjsonfile.with_suffix(''.join(oldfile.suffixes)))

        # Copy over the source meta-data
        metadata = bids.copymetadata(sourcefile, outfolder / bidsname,
                                     options.get('meta', []))

        # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Load the json meta-data
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)

            # Add all the source meta data to the meta-data
            for metakey, metaval in metadata.items():
                if jsondata.get(metakey) == metaval:
                    LOGGER.warning(
                        f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}"
                    )
                jsondata[metakey] = metaval

            # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later
            for metakey, metaval in run['meta'].items():
                if metakey != 'IntendedFor':
                    metaval = datasource.dynamicvalue(metaval,
                                                      cleanup=False,
                                                      runtime=True)
                    try:
                        metaval = ast.literal_eval(str(metaval))
                    except (ValueError, SyntaxError):
                        pass
                    LOGGER.info(
                        f"Adding '{metakey}: {metaval}' to: {jsonfile}")
                if not metaval:
                    metaval = None
                jsondata[metakey] = metaval

            # Remove unused (but added from the template) B0FieldIdentifiers/Sources
            if not jsondata.get('B0FieldSource'):
                jsondata.pop('B0FieldSource', None)
            if not jsondata.get('B0FieldIdentifier'):
                jsondata.pop('B0FieldIdentifier', None)

            # Save the meta-data to the json sidecar-file
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)

            # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition)
            outputfile = [
                file for file in jsonfile.parent.glob(jsonfile.stem + '.*')
                if file.suffix in ('.nii', '.gz')
            ]  # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension)
            if not outputfile:
                LOGGER.exception(
                    f"No data-file found with {jsonfile} when updating {scans_tsv}"
                )
            elif datasource.datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datasource.datatype):
                acq_time = ''
                if dataformat == 'DICOM':
                    acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}"
                elif dataformat == 'PAR':
                    acq_time = datasource.attributes('exam_date')
                if not acq_time or acq_time == 'T':
                    acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}"
                try:
                    acq_time = dateutil.parser.parse(acq_time)
                    if options.get('anon', 'y') in ('y', 'yes'):
                        acq_time = acq_time.replace(
                            year=1925, month=1, day=1
                        )  # Privacy protection (see BIDS specification)
                    acq_time = acq_time.isoformat()
                except Exception as jsonerror:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}"
                    )
                    acq_time = 'n/a'
                scanpath = outputfile[0].relative_to(bidsses)
                scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    personals = {}
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    personals['age'] = ''
    if dataformat == 'DICOM':
        age = datasource.attributes(
            'PatientAge'
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524
        elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
        elif age.endswith('M'): age = float(age.rstrip('M')) / 12
        elif age.endswith('Y'): age = float(age.rstrip('Y'))
        if age:
            if options.get('anon', 'y') in ('y', 'yes'):
                age = int(float(age))
            personals['age'] = str(age)
        personals['sex'] = datasource.attributes('PatientSex')
        personals['size'] = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')

    # Store the collected personals in the participants_table
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    for key in personals:  # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
        if key not in participants_table or participants_table[key].isnull(
        ).get(subid, True) or participants_table[key].get(subid) == 'n/a':
            participants_table.loc[subid, key] = personals[key]

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')
Ejemplo n.º 3
0
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict,
                   bidsfolder: Path, personals: dict, subprefix: str,
                   sesprefix: str) -> None:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param dataformat:  The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat)
    :param session:     The full-path name of the subject/session source file/folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    if dataformat == 'DICOM':
        sourcefile = Path()
        sources = bids.lsdirs(session)
        for source in sources:
            sourcefile = bids.get_dicomfile(source)
            manufacturer = bids.get_dicomfield('Manufacturer', sourcefile)
            if sourcefile.name:
                break

    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
        if sources:
            sourcefile = sources[0]

    else:
        LOGGER.error(
            f"Unsupported data format: {dataformat}\nPlease report this bug")
        return

    if not sources:
        LOGGER.info(f"No data found for: {session}")
        return

    subid, sesid = bids.get_subid_sesid(sourcefile,
                                        bidsmap[dataformat]['subject'],
                                        bidsmap[dataformat]['session'],
                                        subprefix, sesprefix)

    if subid == subprefix:
        LOGGER.error(f"No valid subject identifier found for: {session}")
        return

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = bidsfolder / subid / sesid
    if bidsses.is_dir():
        LOGGER.warning(
            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner"
        )
    bidsses.mkdir(parents=True, exist_ok=True)
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    for source in sources:

        # Get a source-file
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        run, datatype, index = bids.get_matching_run(sourcefile, bidsmap,
                                                     dataformat)

        # Check if we should ignore this run
        if datatype == bids.ignoredatatype:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if index is None:
            LOGGER.error(
                f"Skipping unknown '{datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datatype
        else:
            outfolder = bidsses / datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec',
                        'tsv.gz'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(source, 2).name:
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                path=bidsmap['Options']['dcm2niix']['path'],
                args=bidsmap['Options']['dcm2niix']['args'],
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bids.run_command(command):
                continue

            # Replace uncropped output image with the cropped one
            if '-x y' in bidsmap['Options']['dcm2niix']['args']:
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*")
                ]))
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if postfix[0] == 'e' and bids.get_bidsvalue(
                            newbidsname, 'echo'
                    ):  # NB: Check if postfix[0]=='e' uniquely refers to the right dcm2niixpostfix
                        echonr = f"_{postfix}"  # E.g. echonr='_e1' or echonr='_pha'
                        for dcm2niixpostfix in dcm2niixpostfixes:
                            echonr = echonr.replace(
                                dcm2niixpostfix, ''
                            )  # Strip the dcm2niixpostfix to keep the echonr info. E.g. [echonr='_e1' or echonr='_pha'] -> [echonr='1' or echonr='a']
                        if echonr.isalpha():
                            echonr = ord(
                                echonr
                            ) - 95  # dcm2niix adds an alphabetically ordered character if it outputs more than one image with the same name. Convert character to echo-number: '' -> 1, 'a'->2, etc
                        elif not echonr:
                            echonr = 1
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'echo', str(echonr)
                        )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in ('magnitude', 'magnitude1',
                                                   'magnitude2', 'phase1',
                                                   'phase2', 'phasediff',
                                                   'fieldmap'):
                        if len(dcm2niixfiles) not in (
                                0, 2, 4, 6, 8
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.warning(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude')
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap')
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2')
                        if len(dcm2niixfiles) == 8:
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phase1'
                            )  # Two magnitude + 2 phase images in one folder / datasource
                        else:
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff'
                            )  # One or two magnitude + 1 phasediff image
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2')
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1')
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2')

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                # Save the file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)
                if ext == '.json':
                    oldjsonfile = (outfolder / bidsname).with_suffix('.json')
                    if oldjsonfile in jsonfiles and not oldjsonfile.is_file():
                        jsonfiles.remove(
                            (outfolder / bidsname).with_suffix('.json'))
                    jsonfiles.append(newbidsfile)

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Check if dcm2niix behaved as expected
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Unexpected file conversion result: {jsonfile} not found")
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if datatype == 'dwi':
                bvecfile = jsonfile.with_suffix('.bvec')
                bvalfile = jsonfile.with_suffix('.bval')
                if not bvecfile.is_file():
                    LOGGER.info(f"Adding dummy bvec file: {bvecfile}")
                    with bvecfile.open('w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not bvalfile.is_file():
                    LOGGER.info(f"Adding dummy bval file: {bvalfile}")
                    with bvalfile.open('w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif datatype == 'func':
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    LOGGER.info(f"Adding TaskName to: {jsonfile}")
                    data['TaskName'] = run['bids']['task']
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition)
            niifile = list(
                jsonfile.parent.glob(jsonfile.stem + '.nii*')
            )  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
            if niifile and datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datatype):
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'AcquisitionTime' not in data or not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield(
                        'AcquisitionTime', sourcefile)  # DICOM
                if not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield(
                        'exam_date', sourcefile)  # PAR/XML
                try:
                    acq_time = dateutil.parser.parse(data['AcquisitionTime'])
                except:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}"
                    )
                    acq_time = dateutil.parser.parse('00:00:00')
                scanpath = niifile[0].relative_to(bidsses)
                scans_table.loc[
                    scanpath.as_posix(),
                    'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Add IntendedFor and TE1+TE2 meta-data to the fieldmap json-files. This has been postponed untill all datatypes have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap[dataformat]['fmap'] is not None:
        for fieldmap in bidsmap[dataformat]['fmap']:
            bidsname = bids.get_bidsname(subid, sesid, fieldmap)
            niifiles = []
            intendedfor = fieldmap['bids']['IntendedFor']

            # Search for the imaging files that match the IntendedFor search criteria
            if intendedfor:
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for selector in intendedfor:
                    niifiles.extend(
                        [
                            Path(niifile).relative_to(bidsfolder / subid)
                            for niifile in sorted(
                                bidsses.rglob(f"*{selector}*.nii*"))
                            if selector
                        ]
                    )  # Search in all runs using a relative path to the subject folder
            else:
                intendedfor = []

            # Get the set of json-files (account for multiple runs in one data source and dcm2niix postfixes inserted into the acquisition label)
            jsonfiles = []
            acqlabel = bids.get_bidsvalue(bidsname, 'acq')
            patterns = (bidsname.replace('_run-1_', '_run-[0-9]*_').replace(
                '_magnitude1',
                '_magnitude*').replace('_magnitude2', '_magnitude*').replace(
                    '_phase1', '_phase*').replace('_phase2', '_phase*'),
                        bidsname.replace('_run-1_', '_run-[0-9]*_').replace(
                            '_magnitude1',
                            '_phase*').replace('_magnitude2', '_phase*'))
            for pattern in patterns:
                jsonfiles.extend((bidsses / 'fmap').glob(pattern + '.json'))
                if acqlabel:
                    cepattern = bids.get_bidsvalue(pattern, 'acq',
                                                   acqlabel + '[CE][0-9]*')
                    jsonfiles.extend(
                        list((bidsses / 'fmap').glob(cepattern + '.json')))

            # Save the meta-data in the jsonfiles
            for jsonfile in sorted(set(jsonfiles)):

                # Add the IntendedFor data
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'IntendedFor' not in data:
                    if niifiles:
                        LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                    elif intendedfor:
                        LOGGER.warning(
                            f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results"
                        )
                    else:
                        LOGGER.warning(
                            f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty"
                        )
                    data['IntendedFor'] = [
                        niifile.as_posix() for niifile in niifiles
                    ]  # The path needs to use forward slashes instead of backward slashes
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

                # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file
                if jsonfile.name.endswith('phasediff.json'):
                    json_magnitude = [None, None]
                    TE = [None, None]
                    for n in (0, 1):
                        json_magnitude[
                            n] = jsonfile.parent / jsonfile.name.replace(
                                '_phasediff', f"_magnitude{n+1}")
                        if not json_magnitude[n].is_file():
                            LOGGER.error(
                                f"Could not find expected magnitude{n+1} image associated with: {jsonfile}"
                            )
                        else:
                            with json_magnitude[n].open('r') as json_fid:
                                data = json.load(json_fid)
                            TE[n] = data['EchoTime']
                    if None in TE:
                        LOGGER.error(
                            f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}"
                        )
                    elif TE[0] > TE[1]:
                        LOGGER.error(
                            f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}"
                        )
                    else:
                        with jsonfile.open('r') as json_fid:
                            data = json.load(json_fid)
                        data['EchoTime1'] = TE[0]
                        data['EchoTime2'] = TE[1]
                        LOGGER.info(
                            f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}"
                        )
                        with jsonfile.open('w') as json_fid:
                            json.dump(data, json_fid, indent=4)

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat == 'DICOM' and sourcefile.name:
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return  # Only take data from the first session -> BIDS specification
        age = bids.get_dicomfield(
            'PatientAge', sourcefile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile)
        personals['size'] = bids.get_dicomfield('PatientSize', sourcefile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)
Ejemplo n.º 4
0
def coin_data2bids(dataformat: str, session: Path, bidsmap: dict, bidsfolder: Path, personals: dict, subprefix: str, sesprefix: str) -> None:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param dataformat:  The format of the raw input data that is to be coined (e.g. 'DICOM' or 'PAR', see bids.get_dataformat)
    :param session:     The full-path name of the subject/session source file/folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsfolder:  The full-path name of the BIDS root-folder
    :param personals:   The dictionary with the personal information
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :return:            Nothing
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    if dataformat=='DICOM':
        sourcefile = Path()
        sources    = bids.lsdirs(session)
        for source in sources:
            sourcefile = bids.get_dicomfile(source)
            if sourcefile.name:
                break

    elif dataformat=='PAR':
        sources = bids.get_parfiles(session)
        if sources:
            sourcefile = sources[0]

    else:
        LOGGER.error(f"Unsupported data format: {dataformat}\nPlease report this bug")
        return

    if not sources:
        LOGGER.info(f"No data found for: {session}")
        return

    subid, sesid = bids.get_subid_sesid(sourcefile,
                                        bidsmap[dataformat]['subject'],
                                        bidsmap[dataformat]['session'],
                                        subprefix, sesprefix)

    if subid == subprefix:
        LOGGER.error(f"No valid subject identifier found for: {session}")
        return

    # Create the BIDS session-folder and a scans.tsv file
    bidsses = bidsfolder/subid/sesid
    if bidsses.is_dir():
        LOGGER.warning(f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidsses} was cleaned-up from old data before (re)running the bidscoiner")
    bidsses.mkdir(parents=True, exist_ok=True)
    scans_tsv = bidsses/f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    for source in sources:

        # Get a source-file
        if dataformat=='DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat=='PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        run, modality, index = bids.get_matching_run(sourcefile, bidsmap, dataformat)

        # Check if we should ignore this run
        if modality == bids.ignoremodality:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if index is None:
            LOGGER.error(f"Skipping unknown '{modality}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning")
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/modality folder
        bidsmodality = bidsses/modality
        bidsmodality.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, modality, run)
        runindex = run['bids']['run']
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(bidsmodality, bidsname)

        # Check if file already exists (-> e.g. when a static runindex is used). TODO: Future dcm2niix versions may contain a `-w 1` option: https://github.com/rordenlab/dcm2niix/issues/276
        if (bidsmodality/bidsname).with_suffix('.json').is_file():
            LOGGER.warning(f"{bidsmodality/bidsname}.* already exists and will be deleted -- check your results carefully!")
            for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec'):
                (bidsmodality/bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        command = '{path}dcm2niix {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
            path      = bidsmap['Options']['dcm2niix']['path'],
            args      = bidsmap['Options']['dcm2niix']['args'],
            filename  = bidsname,
            outfolder = bidsmodality,
            source    = source)
        if not bids.run_command(command):
            continue

        # Replace uncropped output image with the cropped one
        if '-x y' in bidsmap['Options']['dcm2niix']['args']:
            for filename in sorted(bidsmodality.glob(bidsname + '*_Crop_*')):                                   # e.g. *_Crop_1.nii.gz
                ext         = ''.join(filename.suffixes)
                newfilename = str(filename).rsplit(ext,1)[0].rsplit('_Crop_',1)[0] + ext
                LOGGER.info(f"Found dcm2niix _Crop_ suffix, replacing original file\n{filename} ->\n{newfilename}")
                filename.replace(newfilename)

        # Rename all files ending with _c%d, _e%d and _ph (and any combination of these) that are added by dcm2niix for multi-coil data, multi-echo data and phase data
        # See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
        jsonfiles = []                                                                                          # Collect the associated json-files (for updating them later) -- possibly > 1
        for dcm2niisuffix in ('_c', '_e', '_ph', '_i', '_Eq', '_real', '_imaginary', '_MoCo', '_t', '_Tilt'):
            for filename in sorted(bidsmodality.glob(f"{bidsname}*{dcm2niisuffix}*")):
                ext             = ''.join(filename.suffixes)
                basepath, index = str(filename).rsplit(ext)[0].rsplit(dcm2niisuffix,1)                          # basepath = the name without the added stuff (i.e. bidsmodality/bidsname), index = added dcm2niix index (e.g. _c1 -> index=1)
                basesuffix      = basepath.rsplit('_',1)[1]                                                     # The BIDS suffix, e.g. basepath = *_magnitude1 -> basesuffix=magnitude1
                index           = index.split('_')[0].zfill(2)                                                  # Zero padd as specified in the BIDS-standard (assuming two digits is sufficient); strip following suffices (fieldmaps produce *_e2_ph files)

                # Phase data may be stored in the magnitude data source (e.g. Philips fieldmaps)
                if 'ph' in filename.name.rsplit(ext)[0].split('_'):
                    basepath = basepath.replace('_magnitude', '_phase')

                # This is a special hack: dcm2niix does not always add a _c/_e suffix for the first(?) coil/echo image -> add it when we encounter a **_e2/_c2 file
                # https://github.com/rordenlab/dcm2niix/issues/381
                if dcm2niisuffix in ('_c','_e') and int(index)==2 and basesuffix not in ['magnitude1', 'phase1']:    # For fieldmaps: *_magnitude1_e[index] -> *_magnitude[index] (This is handled below)
                    filename_ce = Path(basepath + ext)                                                          # The file without the _c1/_e1 suffix
                    if dcm2niisuffix=='_e' and bids.get_bidsvalue(basepath, 'echo'):
                        newbasepath_ce = Path(bids.get_bidsvalue(basepath, 'echo', '1'))
                    else:
                        newbasepath_ce = Path(bids.get_bidsvalue(basepath, 'dummy', dcm2niisuffix.upper()[1:] + '1'.zfill(len(index))))  # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data
                    newfilename_ce = newbasepath_ce.with_suffix(ext)                                            # The file as it should have been
                    if filename_ce.is_file():
                        if filename_ce != newfilename_ce:
                            LOGGER.warning(f"Found no dcm2niix {dcm2niisuffix} suffix for image instance 1, renaming\n{filename_ce} ->\n{newfilename_ce}\nConsider upgrading dcm2niix: https://github.com/rordenlab/dcm2niix/issues/381")
                            if newfilename_ce.is_file():
                                LOGGER.warning(f"Overwriting existing {newfilename_ce} file -- check your results carefully!")
                            filename_ce.replace(newfilename_ce)
                        if ext == '.json':
                            jsonfiles.append(newbasepath_ce.with_suffix('.json'))

                # Patch the basepath with the dcm2niix suffix info (we can't rely on the basepath info here because Siemens can e.g. put multiple echos in one series / run-folder)
                if dcm2niisuffix=='_e' and bids.get_bidsvalue(basepath, 'echo') and index:
                    basepath = bids.get_bidsvalue(basepath, 'echo', str(int(index)))                            # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness

                elif dcm2niisuffix=='_e' and basesuffix in ('magnitude1','magnitude2','phase1','phase2') and index:  # i.e. modality == 'fmap'
                    basepath = basepath[0:-1] + str(int(index))                                                 # basepath: *_magnitude1_e[index] -> *_magnitude[index] and *_phase1_e[index]_ph -> *_phase[index]

                elif dcm2niisuffix=='_e' and basesuffix=='phasediff' and index:                                 # i.e. modality == 'fmap'
                    pass

                else:
                    basepath = bids.get_bidsvalue(basepath, 'dummy', dcm2niisuffix.upper()[1:] + index)         # --> append to acq-label, may need to be elaborated for future BIDS standards, supporting multi-coil data

                # Save the file with a new name
                newbidsname = str(Path(basepath).name)
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(bidsmodality, newbidsname, ext)                       # Update the runindex now that the acq-label has changed
                newfilename = (bidsmodality/newbidsname).with_suffix(ext)
                LOGGER.info(f"Found dcm2niix {dcm2niisuffix} suffix, renaming\n{filename} ->\n{newfilename}")
                if newfilename.is_file():
                    LOGGER.warning(f"Overwriting existing {newfilename} file -- check your results carefully!")
                filename.replace(newfilename)
                if ext == '.json':
                    jsonfiles.append((bidsmodality/newbidsname).with_suffix('.json'))

        # Loop over and adapt all the newly produced json files and write to the scans.tsv file (every nifti-file comes with a json-file)
        if not jsonfiles:
            jsonfiles = [(bidsmodality/bidsname).with_suffix('.json')]
        for jsonfile in set(jsonfiles):

            # Check if dcm2niix behaved as expected
            if not jsonfile.is_file():
                LOGGER.error(f"Unexpected file conversion result: {jsonfile} not found")
                continue

            # Add a dummy b0 bval- and bvec-file for any file without a bval/bvec file (e.g. sbref, b0 scans)
            if modality == 'dwi':
                bvecfile = jsonfile.with_suffix('.bvec')
                bvalfile = jsonfile.with_suffix('.bval')
                if not bvecfile.is_file():
                    LOGGER.info(f"Adding dummy bvec file: {bvecfile}")
                    with bvecfile.open('w') as bvec_fid:
                        bvec_fid.write('0\n0\n0\n')
                if not bvalfile.is_file():
                    LOGGER.info(f"Adding dummy bval file: {bvalfile}")
                    with bvalfile.open('w') as bval_fid:
                        bval_fid.write('0\n')

            # Add the TaskName to the func json-file
            elif modality == 'func':
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if not 'TaskName' in data:
                    LOGGER.info(f"Adding TaskName to: {jsonfile}")
                    data['TaskName'] = run['bids']['task']
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

            # Parse the acquisition time from the json file or else from the source header (NB: assuming the source file represents the first acquisition)
            if bidsmodality.name not in bidsmap['Options']['bidscoin']['bidsignore']:
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'AcquisitionTime' not in data or not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield('AcquisitionTime', sourcefile)       # DICOM
                if not data['AcquisitionTime']:
                    data['AcquisitionTime'] = bids.get_sourcefield('exam_date', sourcefile)             # PAR/XML
                try:
                    acq_time = dateutil.parser.parse(data['AcquisitionTime'])
                except:
                    LOGGER.warning(f"Could not parse the acquisition time from: '{data['AcquisitionTime']}' in {sourcefile}")
                    acq_time = dateutil.parser.parse('00:00:00')
                scanpath = list(jsonfile.parent.glob(jsonfile.stem + '.nii*'))[0].relative_to(bidsses)  # Find the corresponding nifti file (there should be only one, let's not make assumptions about the .gz extension)
                scans_table.loc[scanpath.as_posix(), 'acq_time'] = '1925-01-01T' + acq_time.strftime('%H:%M:%S')

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time','filename'], inplace=True)
    scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

    # Add IntendedFor and TE1+TE2 meta-data the fieldmap json-files. This has been postponed untill all modalities have been processed (i.e. so that all target images are indeed on disk)
    if bidsmap[dataformat]['fmap'] is not None:
        for fieldmap in bidsmap[dataformat]['fmap']:
            bidsname    = bids.get_bidsname(subid, sesid, 'fmap', fieldmap)
            niifiles    = []
            intendedfor = fieldmap['bids']['IntendedFor']

            # Search for the imaging files that match the IntendedFor search criteria
            if intendedfor:
                if intendedfor.startswith('<<') and intendedfor.endswith('>>'):
                    intendedfor = intendedfor[2:-2].split('><')
                elif not isinstance(intendedfor, list):
                    intendedfor = [intendedfor]
                for selector in intendedfor:
                    niifiles.extend([Path(niifile).relative_to(bidsfolder/subid)
                                     for niifile in sorted(bidsses.rglob(f"*{selector}*.nii*")) if selector])                                   # Search in all runs using a relative path to the subject folder
            else:
                intendedfor = []

            # Get the set of json-files (account for multiple runs in one data source and dcm2niix suffixes inserted into the acquisition label)
            jsonfiles = []
            acqlabel  = bids.get_bidsvalue(bidsname, 'acq')
            patterns  = (bidsname.replace('_run-1_',     '_run-[0-9]*_').
                                  replace('_magnitude1', '_magnitude*').
                                  replace('_magnitude2', '_magnitude*').
                                  replace('_phase1',     '_phase*').
                                  replace('_phase2',     '_phase*'),
                         bidsname.replace('_run-1_',     '_run-[0-9]*_').
                                  replace('_magnitude1', '_phase*').
                                  replace('_magnitude2', '_phase*'))
            for pattern in patterns:
                jsonfiles.extend((bidsses/'fmap').glob(pattern  + '.json'))
                if acqlabel:
                    cepattern = bids.get_bidsvalue(pattern, 'acq', acqlabel + '[CE][0-9]*')
                    jsonfiles.extend(list((bidsses/'fmap').glob(cepattern + '.json')))

            # Save the meta-data in the jsonfiles
            for jsonfile in set(jsonfiles):

                # Add the IntendedFor data
                with jsonfile.open('r') as json_fid:
                    data = json.load(json_fid)
                if 'IntendedFor' not in data:
                    if niifiles:
                        LOGGER.info(f"Adding IntendedFor to: {jsonfile}")
                    elif intendedfor:
                        LOGGER.warning(f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the search for {intendedfor} gave no results")
                    else:
                        LOGGER.warning(f"Empty 'IntendedFor' fieldmap value in {jsonfile}: the IntendedFor value of the bidsmap entry was empty")
                    data['IntendedFor'] = [niifile.as_posix() for niifile in niifiles]                                                              # The path needs to use forward slashes instead of backward slashes
                    with jsonfile.open('w') as json_fid:
                        json.dump(data, json_fid, indent=4)

                # Extract the echo times from magnitude1 and magnitude2 and add them to the phasediff json-file
                if jsonfile.name.endswith('phasediff.json'):
                    json_magnitude = [None, None]
                    TE             = [None, None]
                    for n in (0,1):
                        json_magnitude[n] = jsonfile.parent/jsonfile.name.replace('_phasediff', f"_magnitude{n+1}")
                        if not json_magnitude[n].is_file():
                            LOGGER.error(f"Could not find expected magnitude{n+1} image associated with: {jsonfile}")
                        else:
                            with json_magnitude[n].open('r') as json_fid:
                                data = json.load(json_fid)
                            TE[n] = data['EchoTime']
                    if None in TE:
                        LOGGER.error(f"Cannot find and add valid EchoTime1={TE[0]} and EchoTime2={TE[1]} data to: {jsonfile}")
                    elif TE[0] > TE[1]:
                        LOGGER.error(f"Found invalid EchoTime1={TE[0]} > EchoTime2={TE[1]} for: {jsonfile}")
                    else:
                        with jsonfile.open('r') as json_fid:
                            data = json.load(json_fid)
                        data['EchoTime1'] = TE[0]
                        data['EchoTime2'] = TE[1]
                        LOGGER.info(f"Adding EchoTime1: {TE[0]} and EchoTime2: {TE[1]} to {jsonfile}")
                        with jsonfile.open('w') as json_fid:
                            json.dump(data, json_fid, indent=4)

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat=='DICOM' and sourcefile.name:
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return                                              # Only from the first session -> BIDS specification
        age = bids.get_dicomfield('PatientAge', sourcefile)         # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D'))/365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W'))/52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M'))/12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex']     = bids.get_dicomfield('PatientSex',    sourcefile)
        personals['size']    = bids.get_dicomfield('PatientSize',   sourcefile)
        personals['weight']  = bids.get_dicomfield('PatientWeight', sourcefile)
Ejemplo n.º 5
0
def bidsmapper(rawfolder: str,
               bidsfolder: str,
               bidsmapfile: str,
               templatefile: str,
               subprefix: str = 'sub-',
               sesprefix: str = 'ses-',
               store: bool = False,
               interactive: bool = True) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder
    and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin.
    Folders in sourcefolder are assumed to contain a single dataset.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param bidsmapfile:     The name of the bidsmap YAML-file
    :param templatefile:    The name of the bidsmap template YAML-file
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :param store:           If True, the provenance samples will be stored
    :param interactive:     If True, the user will be asked for help if an unknown run is encountered
    :return:bidsmapfile:    The name of the mapped bidsmap YAML-file
    """

    # Input checking
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()
    bidsmapfile = Path(bidsmapfile)
    templatefile = Path(templatefile)
    bidscoinfolder = bidsfolder / 'code' / 'bidscoin'

    # Start logging
    bids.setup_logging(bidscoinfolder / 'bidsmapper.log')
    LOGGER.info('')
    LOGGER.info('-------------- START BIDSmapper ------------')
    LOGGER.info(
        f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} "
        f" template={templatefile} subprefix={subprefix} sesprefix={sesprefix} store={store} interactive={interactive}"
    )

    # Get the heuristics for filling the new bidsmap
    bidsmap_old, _ = bids.load_bidsmap(bidsmapfile, bidscoinfolder)
    template, _ = bids.load_bidsmap(templatefile, bidscoinfolder)

    # Create the new bidsmap as a copy / bidsmap skeleton with no modality entries (i.e. bidsmap with empty lists)
    if bidsmap_old:
        bidsmap_new = copy.deepcopy(bidsmap_old)
    else:
        bidsmap_new = copy.deepcopy(template)
    for logic in ('DICOM', 'PAR', 'P7', 'Nifti', 'FileSystem'):
        for modality in bids.bidsmodalities + (bids.unknownmodality,
                                               bids.ignoremodality):
            if bidsmap_new[logic] and modality in bidsmap_new[logic]:
                bidsmap_new[logic][modality] = None

    # Start with an empty skeleton if we didn't have an old bidsmap
    if not bidsmap_old:
        bidsmap_old = copy.deepcopy(bidsmap_new)

    # Start the Qt-application
    gui = interactive
    if gui:
        app = QApplication(sys.argv)
        app.setApplicationName('BIDS editor')
        mainwin = bidseditor.MainWindow()
        gui = bidseditor.Ui_MainWindow()
        gui.interactive = interactive
        gui.subprefix = subprefix
        gui.sesprefix = sesprefix

        if gui.interactive == 2:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                f"The bidsmapper will now scan {bidsfolder} and whenever "
                f"it detects a new type of scan it will ask you to identify it.\n\n"
                f"It is important that you choose the correct BIDS modality "
                f"(e.g. 'anat', 'dwi' or 'func') and suffix (e.g. 'bold' or 'sbref').\n\n"
                f"At the end you will be shown an overview of all the "
                f"different scan types and BIDScoin options (as in the "
                f"bidseditor) that you can then (re)edit to your needs")

    # Loop over all subjects and sessions and built up the bidsmap entries
    dataformat = ''
    subjects = bids.lsdirs(rawfolder, subprefix + '*')
    if not subjects:
        LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*')
        gui = None
    for n, subject in enumerate(subjects, 1):

        sessions = bids.lsdirs(subject, sesprefix + '*')
        if not sessions:
            sessions = [subject]
        for session in sessions:

            # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
            session, unpacked = bids.unpack(session, subprefix, sesprefix, '*')
            if unpacked:
                store = dict(source=unpacked,
                             target=bidscoinfolder / 'provenance')
            elif store:
                store = dict(source=rawfolder,
                             target=bidscoinfolder / 'provenance')
            else:
                store = dict()

            # Loop of the different DICOM runs (series) and collect source files
            sourcefiles = []
            dataformat = bids.get_dataformat(session)
            if not dataformat:
                LOGGER.info(
                    f"Skipping: {session} (subject {n}/{len(subjects)})")
                continue

            LOGGER.info(f"Parsing: {session} (subject {n}/{len(subjects)})")

            if dataformat == 'DICOM':
                for sourcedir in bids.lsdirs(session):
                    sourcefile = bids.get_dicomfile(sourcedir)
                    if sourcefile.name:
                        sourcefiles.append(sourcefile)

            if dataformat == 'PAR':
                sourcefiles = bids.get_parfiles(session)

            if dataformat == 'P7':
                sourcefiles = bids.get_p7file(session)

            # Update the bidsmap with the info from the source files
            for sourcefile in sourcefiles:
                bidsmap_new = build_bidsmap(dataformat, sourcefile,
                                            bidsmap_new, bidsmap_old, template,
                                            store, gui)

            # Update / append the nifti mapping
            if dataformat == 'Nifti':
                bidsmap_new = build_niftimap(session, bidsmap_new, bidsmap_old)

            # Update / append the file-system mapping
            if dataformat == 'FileSystem':
                bidsmap_new = build_filesystemmap(session, bidsmap_new,
                                                  bidsmap_old)

            # Update / append the plugin mapping
            if bidsmap_old['PlugIns']:
                bidsmap_new = build_pluginmap(session, bidsmap_new,
                                              bidsmap_old)

            # Clean-up the temporary unpacked data
            if unpacked:
                shutil.rmtree(session)

    if not dataformat:
        LOGGER.warning('Could not determine the dataformat of the source data')

    # (Re)launch the bidseditor UI_MainWindow
    bidsmapfile = bidscoinfolder / 'bidsmap.yaml'
    if gui:
        if not dataformat:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                'Could not determine the dataformat of the source data.\n'
                'You can try running the bidseditor tool yourself')
        else:
            QMessageBox.information(
                mainwin, 'BIDS mapping workflow',
                f"The bidsmapper has finished scanning {rawfolder}\n\n"
                f"Please carefully check all the different BIDS output names "
                f"and BIDScoin options and (re)edit them to your needs.\n\n"
                f"You can always redo this step later by re-running the "
                f"bidsmapper or by just running the bidseditor tool")

            LOGGER.info('Opening the bidseditor')
            gui.setupUi(mainwin,
                        bidsfolder,
                        bidsmapfile,
                        bidsmap_new,
                        copy.deepcopy(bidsmap_new),
                        template,
                        dataformat,
                        subprefix=subprefix,
                        sesprefix=sesprefix)
            mainwin.show()
            app.exec()
    else:
        # Save the bidsmap in the bidscoinfolder
        bids.save_bidsmap(bidsmapfile, bidsmap_new)

    LOGGER.info('-------------- FINISHED! -------------------')
    LOGGER.info('')

    bids.reporterrors()
Ejemplo n.º 6
0
def scanparticipant(dataformat: str, session: Path, personals: dict,
                    subid: str, sesid: str) -> bool:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param session:     The full-path name of the subject/session source file/folder
    :param personals:   The dictionary with the personal information
    :param subid:       The subject-id from the bids-folder
    :param sesid:       The session-id from the bids-folder
    :return:            True if successful
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    sourcefile = Path()
    if dataformat == 'DICOM':
        sources = bids.lsdirs(session)
        for source in sources:
            sourcefile = bids.get_dicomfile(source)
            if sourcefile.name:
                break

    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        if sources:
            sourcefile = sources[0]

    else:
        LOGGER.error(
            f"Unsupported data format: {dataformat}\nPlease report this bug")
        return False

    if not sources:
        LOGGER.info(f"No data found for: {session}")
        return False

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat == 'DICOM' and sourcefile.name:
        personals['participant_id'] = subid
        if sesid:
            if 'session_id' not in personals:
                personals['session_id'] = sesid
            else:
                return False  # Only from the first session -> BIDS specification
        age = bids.get_dicomfield(
            'PatientAge', sourcefile
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'):
            personals['age'] = str(int(float(age.rstrip('D')) / 365.2524))
        elif age.endswith('W'):
            personals['age'] = str(int(float(age.rstrip('W')) / 52.1775))
        elif age.endswith('M'):
            personals['age'] = str(int(float(age.rstrip('M')) / 12))
        elif age.endswith('Y'):
            personals['age'] = str(int(float(age.rstrip('Y'))))
        elif age:
            personals['age'] = age
        personals['sex'] = bids.get_dicomfield('PatientSex', sourcefile)
        personals['size'] = bids.get_dicomfield('PatientSize', sourcefile)
        personals['weight'] = bids.get_dicomfield('PatientWeight', sourcefile)

        return True