Beispiel #1
0
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the
    corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started and see what dataformat we have
    options = bidsmap['Options']['plugins']['dcm2niix2bids']
    datasource = bids.get_datasource(session, {'dcm2niix2bids': options})
    dataformat = datasource.dataformat
    if not dataformat:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Make a list of all the data sources / runs
    manufacturer = 'UNKNOWN'
    sources = []
    if dataformat == 'DICOM':
        sources = bidscoin.lsdirs(session)
        manufacturer = datasource.attributes('Manufacturer')
    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    sourcefile = Path()
    for source in sources:

        # Get a sourcefile
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options},
                                     dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datasource.datatype
        else:
            outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval',
                        '.bvec'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(
                    source, 2).name:  # TODO: issue warning or support PAR
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                command=options['command'],
                args=options.get('args', ''),
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bidscoin.run_command(command):
                if not list(outfolder.glob(f"{bidsname}.nii*")): continue
            if list(outfolder.glob(f"{bidsname}a.nii*")):
                LOGGER.warning(
                    f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)"
                )

            # Replace uncropped output image with the cropped one
            if '-x y' in options.get('args', ''):
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC',
                                 '_fieldmaphz')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*.nii*")
                ]))
            if not jsonfiles[0].is_file(
            ) and dcm2niixfiles:  # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo)
                jsonfiles.pop(0)
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if 'echo' in run['bids'] and postfix.startswith('e'):
                        echonr = f"_{postfix}".replace('_e',
                                                       '')  # E.g. postfix='e1'
                        if not echonr:
                            echonr = '1'
                        if echonr.isnumeric():
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'echo', echonr.lstrip('0')
                            )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness
                        else:
                            LOGGER.error(
                                f"Unexpected postix '{postfix}' found in {dcm2niixfile}"
                            )
                            newbidsname = bids.get_bidsvalue(
                                newbidsname, 'dummy', postfix
                            )  # Append the unknown postfix to the acq-label

                    # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info
                    elif 'part' in run['bids'] and postfix in (
                            'ph', 'real', 'imaginary'
                    ):  # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0]
                        if postfix == 'ph':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'phase')
                        if postfix == 'real':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'real')
                        if postfix == 'imaginary':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'imag')

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][
                            'suffixes']:  # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions
                        if len(dcm2niixfiles) not in (
                                1, 2, 3, 4
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.debug(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1'
                        )  # Case 2 = Two phase and magnitude images
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')  # Case 2
                        if len(dcm2niixfiles) in (
                                2, 3
                        ):  # Case 1 = One or two magnitude + one phasediff image
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff')
                            newbidsname = newbidsname.replace(
                                '_magnitude2_ph', '_phasediff')
                        newbidsname = newbidsname.replace(
                            '_phasediff_e1', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_e2', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_ph', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_magnitude1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude'
                        )  # Case 3 = One magnitude + one fieldmap image
                        if len(dcm2niixfiles) == 2:
                            newbidsname = newbidsname.replace(
                                '_fieldmap_e1', '_magnitude'
                            )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_e1', '_fieldmap')  # Case 3
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap'
                        )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')  # Case 3

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                    # The ADC images are not BIDS compliant
                    if postfix == 'ADC':
                        LOGGER.warning(
                            f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file"
                        )

                # Save the nifti file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)

                # Rename all associated files (i.e. the json-, bval- and bvec-files)
                oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json')
                newjsonfile = newbidsfile.with_suffix('').with_suffix('.json')
                if not oldjsonfile.is_file():
                    LOGGER.warning(
                        f"Unexpected file conversion result: {oldjsonfile} not found"
                    )
                else:
                    if oldjsonfile in jsonfiles:
                        jsonfiles.remove(oldjsonfile)
                    if newjsonfile not in jsonfiles:
                        jsonfiles.append(newjsonfile)
                for oldfile in outfolder.glob(
                        dcm2niixfile.with_suffix('').stem + '.*'):
                    oldfile.replace(
                        newjsonfile.with_suffix(''.join(oldfile.suffixes)))

        # Copy over the source meta-data
        metadata = bids.copymetadata(sourcefile, outfolder / bidsname,
                                     options.get('meta', []))

        # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Load the json meta-data
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)

            # Add all the source meta data to the meta-data
            for metakey, metaval in metadata.items():
                if jsondata.get(metakey) == metaval:
                    LOGGER.warning(
                        f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}"
                    )
                jsondata[metakey] = metaval

            # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later
            for metakey, metaval in run['meta'].items():
                if metakey != 'IntendedFor':
                    metaval = datasource.dynamicvalue(metaval,
                                                      cleanup=False,
                                                      runtime=True)
                    try:
                        metaval = ast.literal_eval(str(metaval))
                    except (ValueError, SyntaxError):
                        pass
                    LOGGER.info(
                        f"Adding '{metakey}: {metaval}' to: {jsonfile}")
                if not metaval:
                    metaval = None
                jsondata[metakey] = metaval

            # Remove unused (but added from the template) B0FieldIdentifiers/Sources
            if not jsondata.get('B0FieldSource'):
                jsondata.pop('B0FieldSource', None)
            if not jsondata.get('B0FieldIdentifier'):
                jsondata.pop('B0FieldIdentifier', None)

            # Save the meta-data to the json sidecar-file
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)

            # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition)
            outputfile = [
                file for file in jsonfile.parent.glob(jsonfile.stem + '.*')
                if file.suffix in ('.nii', '.gz')
            ]  # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension)
            if not outputfile:
                LOGGER.exception(
                    f"No data-file found with {jsonfile} when updating {scans_tsv}"
                )
            elif datasource.datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datasource.datatype):
                acq_time = ''
                if dataformat == 'DICOM':
                    acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}"
                elif dataformat == 'PAR':
                    acq_time = datasource.attributes('exam_date')
                if not acq_time or acq_time == 'T':
                    acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}"
                try:
                    acq_time = dateutil.parser.parse(acq_time)
                    if options.get('anon', 'y') in ('y', 'yes'):
                        acq_time = acq_time.replace(
                            year=1925, month=1, day=1
                        )  # Privacy protection (see BIDS specification)
                    acq_time = acq_time.isoformat()
                except Exception as jsonerror:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}"
                    )
                    acq_time = 'n/a'
                scanpath = outputfile[0].relative_to(bidsses)
                scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    personals = {}
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    personals['age'] = ''
    if dataformat == 'DICOM':
        age = datasource.attributes(
            'PatientAge'
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524
        elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
        elif age.endswith('M'): age = float(age.rstrip('M')) / 12
        elif age.endswith('Y'): age = float(age.rstrip('Y'))
        if age:
            if options.get('anon', 'y') in ('y', 'yes'):
                age = int(float(age))
            personals['age'] = str(age)
        personals['sex'] = datasource.attributes('PatientSex')
        personals['size'] = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')

    # Store the collected personals in the participants_table
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    for key in personals:  # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
        if key not in participants_table or participants_table[key].isnull(
        ).get(subid, True) or participants_table[key].get(subid) == 'n/a':
            participants_table.loc[subid, key] = personals[key]

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')
Beispiel #2
0
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict,
                      template: dict, store: dict) -> None:
    """
    All the logic to map the DICOM/PAR source fields onto bids labels go into this function

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap_new: The new study bidsmap that we are building
    :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap
    :param template:    The template bidsmap with the default heuristics
    :param store:       The paths of the source- and target-folder
    :return:
    """

    # Get started
    plugin = {
        'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids']
    }
    datasource = bids.get_datasource(session, plugin)
    dataformat = datasource.dataformat
    if not dataformat:
        return

    # Collect the different DICOM/PAR source files for all runs in the session
    sourcefiles = []
    if dataformat == 'DICOM':
        for sourcedir in bidscoin.lsdirs(session):
            for n in range(
                    1
            ):  # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-()
                sourcefile = bids.get_dicomfile(sourcedir, n)
                if sourcefile.name:
                    sourcefiles.append(sourcefile)
    elif dataformat == 'PAR':
        sourcefiles = bids.get_parfiles(session)
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Update the bidsmap with the info from the source files
    for sourcefile in sourcefiles:

        # Input checks
        if not sourcefile.name or (not template[dataformat]
                                   and not bidsmap_old[dataformat]):
            LOGGER.error(
                f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}"
            )
            return

        # See if we can find a matching run in the old bidsmap
        datasource = bids.DataSource(sourcefile, plugin, dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap_old)

        # If not, see if we can find a matching run in the template
        if not match:
            run, _ = bids.get_matching_run(datasource, template)

        # See if we have collected the run somewhere in our new bidsmap
        if not bids.exist_run(bidsmap_new, '', run):

            # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample
            if not match:
                LOGGER.info(
                    f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}"
                )

            # Now work from the provenance store
            if store:
                targetfile = store['target'] / sourcefile.relative_to(
                    store['source'])
                targetfile.parent.mkdir(parents=True, exist_ok=True)
                run['provenance'] = str(shutil.copy2(sourcefile, targetfile))

            # Copy the filled-in run over to the new bidsmap
            bids.append_run(bidsmap_new, run)

        else:
            # Communicate with the user if the run was already present in bidsmap_old or in template
            LOGGER.debug(
                f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}"
            )
def echocombine(bidsdir: str, pattern: str, subjects: list, output: str, algorithm: str, weights: list, force: bool=False):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike recursive search pattern (relative to the subject/session folder) to select the first echo of the images that need to be combined, e.g. '*task-*echo-1*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the output is saved. It can be the name of a BIDS datatype folder, such as 'func', or of the derivatives folder, i.e. 'derivatives'. If output = [the name of the input datatype folder] then the original echo images are replaced by one combined image. If output is left empty then the combined image is saved in the input datatype folder and the original echo images are moved to the {unknowndatatype} folder
    :param algorithm:   Combination algorithm, either 'PAID', 'TE' or 'average'
    :param weights:     Weights for each echo
    :param force:       Boolean to overwrite existing ME target files
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bidscoin.setup_logging(bidsdir/'code'/'bidscoin'/'echocombine.log')
    LOGGER.info('')
    LOGGER.info(f"--------- START echocombine ---------")
    LOGGER.info(f">>> echocombine bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
                f" algorithm={algorithm} weights={weights}")

    # Get the list of subjects
    if not subjects:
        subjects = bidscoin.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = ['sub-' + subject.replace('sub-', '') for subject in subjects]              # Make sure there is a "sub-" prefix
        subjects = [bidsdir/subject for subject in subjects if (bidsdir/subject).is_dir()]

    # Loop over bids subject/session-directories
    with logging_redirect_tqdm():
        for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('-------------------------------------')
                LOGGER.info(f"Combining echos for ({n}/{len(subjects)}): {session}")

                subid, sesid = bids.DataSource(session/'dum.my', subprefix='sub-', sesprefix='ses-').subid_sesid()

                # Search for multi-echo matches
                for match in sorted([match for match in session.rglob(pattern) if '.nii' in match.suffixes]):

                    # Check if it is normal/BIDS multi-echo data or that the echo-number is appended to the acquisition label (as done in BIDScoin)
                    if '_echo-' in match.name:
                        echonr      = bids.get_bidsvalue(match, 'echo')
                        mepattern   = bids.get_bidsvalue(match, 'echo', '*')                        # The pattern that selects all echos
                        cename      = match.name.replace(f"_echo-{echonr}", '')                     # The combined-echo output filename
                    elif '_acq-' in match.name and bids.get_bidsvalue(match, 'acq').split('e')[-1].isnumeric():
                        acq, echonr = bids.get_bidsvalue(match, 'acq').rsplit('e',1)
                        mepattern   = bids.get_bidsvalue(match, 'acq', acq + 'e*')                  # The pattern that selects all echos
                        cename      = match.name.replace(f"_acq-{acq}e{echonr}", f"_acq-{acq}")     # The combined-echo output filename
                        LOGGER.info(f"No 'echo' key-value pair found in the filename, using the 'acq-{acq}e{echonr}' pair instead (BIDScoin-style)")
                    else:
                        LOGGER.warning(f"No 'echo' encoding found in the filename, skipping: {match}")
                        continue
                    echos     = sorted(match.parent.glob(mepattern.name))
                    newechos  = [echo.parents[1]/unknowndatatype/echo.name for echo in echos]
                    if len(echos) == 1:
                        LOGGER.warning(f"Only one echo image found, nothing to do for: {match}")
                        continue

                    # Construct the combined-echo output filename and check if that file already exists
                    datatype = match.parent.name
                    if not output:
                        cefile = session/datatype/cename
                    elif output == 'derivatives':
                        cefile = bidsdir/'derivatives'/'multiecho'/subid/sesid/datatype/cename
                    else:
                        cefile = session/output/cename
                    cefile.parent.mkdir(parents=True, exist_ok=True)
                    if cefile.is_file() and not force:
                        LOGGER.warning(f"Outputfile {cefile} already exists, skipping: {match}")
                        continue

                    # Combine the multi-echo images
                    me.me_combine(mepattern, cefile, algorithm, weights, saveweights=False)

                    # (Re)move the original multi-echo images
                    if not output:
                        for echo, newecho in zip(echos, newechos):
                            LOGGER.info(f"Moving original echo image: {echo} -> {newecho}")
                            newecho.parent.mkdir(parents=True, exist_ok=True)
                            echo.replace(newecho)
                            echo.with_suffix('').with_suffix('.json').replace(newecho.with_suffix('').with_suffix('.json'))
                    elif output == datatype:
                        for echo in echos:
                            LOGGER.info(f"Removing original echo image: {echo}")
                            echo.unlink()
                            echo.with_suffix('').with_suffix('.json').unlink()

                    # Construct the path names relative to the session folder (as in the scans.tsv file)
                    oldechos_rel = [echo.relative_to(session).as_posix() for echo in echos]
                    newechos_rel = [echo.relative_to(session).as_posix() for echo in echos + newechos if echo.is_file()]
                    if output == 'derivatives':
                        cefile_rel = ''                 # A remote folder cannot be specified as IntendedFor :-(
                    else:
                        cefile_rel = cefile.relative_to(session).as_posix()

                    # Update the IntendedFor fields of the fieldmaps (i.e. remove the old echos, add the echo-combined image and, optionally, the new echos)
                    if output != 'derivatives' and (session/'fmap').is_dir():
                        for fmap in (session/'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                metadata = json.load(fmap_fid)
                            intendedfor = metadata.get('IntendedFor', [])
                            if isinstance(intendedfor, str):
                                intendedfor = [intendedfor]
                            if sesid:                   # NB: IntendedFor is relative to the subject folder
                                intendedfor = [file.split(sesid+'/',1)[1] for file in intendedfor]
                            if oldechos_rel[0] in intendedfor:
                                LOGGER.info(f"Updating 'IntendedFor' in {fmap}")
                                relfiles                = [file for file in intendedfor if file not in oldechos_rel] + newechos_rel + [cefile_rel]
                                metadata['IntendedFor'] = [(Path(sesid)/relfile).as_posix() for relfile in relfiles]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(metadata, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir/'.bidsignore').is_file():
                        bidsignore = (bidsdir/'.bidsignore').read_text().splitlines()
                    else:
                        bidsignore = [unknowndatatype + '/']
                    scans_tsv = session/f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
                    if scans_tsv.is_file():

                        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
                        if oldechos_rel[0] in scans_table.index:
                            scans_table.loc['oldrow'] = scans_table.loc[oldechos_rel[0]]
                        elif 'acq_time' in scans_table:
                            with cefile.with_suffix('').with_suffix('.json').open('r') as fid:
                                metadata = json.load(fid)
                            date = scans_table.iloc[0]['acq_time'].split('T')[0]
                            scans_table.loc['oldrow', 'acq_time'] = f"{date}T{metadata.get('AcquisitionTime')}"
                        else:
                            scans_table.loc['oldrow'] = None

                        if output+'/' not in bidsignore + ['derivatives/'] and cefile.parent.name in bids.bidsdatatypes:
                            LOGGER.info(f"Adding '{cefile_rel}' to '{scans_tsv}'")
                            scans_table.loc[cefile_rel] = scans_table.loc['oldrow']

                        for echo in oldechos_rel + newechos_rel:
                            if echo in scans_table.index and not (session/echo).is_file():
                                LOGGER.info(f"Removing '{echo}' from '{scans_tsv}'")
                                scans_table.drop(echo, inplace=True)
                            elif echo not in scans_table.index and (session/echo).is_file() and echo.split('/')[0] in bids.bidsdatatypes:
                                LOGGER.info(f"Adding '{echo}' to '{scans_tsv}'")
                                scans_table.loc[echo] = scans_table.loc['oldrow']       # NB: Assuming that the echo-rows are all identical

                        scans_table.drop('oldrow', inplace=True)
                        scans_table.sort_values(by=['acq_time','filename'], inplace=True)
                        scans_table.replace('','n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
                        for scan in scans_table.index:
                            if not (session/scan).is_file():
                                LOGGER.warning(f"Found non-existent file '{scan}' in '{scans_tsv}'")

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Beispiel #4
0
def medeface(bidsdir: str, pattern: str, maskpattern: str, subjects: list,
             force: bool, output: str, cluster: bool, nativespec: str,
             kwargs: dict):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike search pattern (relative to the subject/session folder) to select the echo-images that need to be defaced, e.g. 'anat/*_T1w*'
    :param maskpattern: Globlike search pattern (relative to the subject/session folder) to select the images from which the defacemask is computed, e.g. 'anat/*_part-mag_*_T2starw*'. If not given then 'pattern' is used
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param force:       If True then images will be processed, regardless if images have already been defaced (i.e. if {"Defaced": True} in the json sidecar file)
    :param output:      Determines where the defaced images are saved. It can be the name of a BIDS datatype folder, such as 'anat', or of the derivatives folder, i.e. 'derivatives'. If output is left empty then the original images are replaced by the defaced images
    :param cluster:     Flag to submit the deface jobs to the high-performance compute (HPC) cluster
    :param nativespec:  DRMAA native specifications for submitting deface jobs to the HPC cluster
    :param kwargs:      Additional arguments (in dict/json-style) that are passed to pydeface. See examples for usage
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()
    if not maskpattern:
        maskpattern = pattern

    # Start logging
    bidscoin.setup_logging(bidsdir / 'code' / 'bidscoin' / 'deface.log')
    LOGGER.info('')
    LOGGER.info('------------ START multi-echo deface ----------')
    LOGGER.info(
        f">>> medeface bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
        f" cluster={cluster} nativespec={nativespec} {kwargs}")

    # Get the list of subjects
    if not subjects:
        subjects = bidscoin.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = [
            'sub-' + subject.replace('sub-', '') for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            bidsdir / subject for subject in subjects
            if (bidsdir / subject).is_dir()
        ]

    # Prepare the HPC pydeface job submission
    with drmaa.Session() as pbatch:
        if cluster:
            jt = pbatch.createJobTemplate()
            jt.jobEnvironment = os.environ
            jt.remoteCommand = shutil.which('pydeface')
            jt.nativeSpecification = nativespec
            jt.joinFiles = True

        # Loop over bids subject/session-directories to first get all the echo-combined deface masks
        for n, subject in enumerate(subjects, 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                datasource = bids.DataSource(session / 'dum.my',
                                             subprefix='sub-',
                                             sesprefix='ses-')
                subid, sesid = datasource.subid_sesid()

                # Read the echo-images that will be combined to compute the deface mask
                echofiles = sorted([
                    match for match in session.glob(maskpattern)
                    if '.nii' in match.suffixes
                ])
                if not echofiles:
                    LOGGER.info(
                        f'No mask files found for: {session}/{maskpattern}')
                    continue

                # Check the json "Defaced" field to see if it has already been defaced
                if not force:
                    with echofiles[0].with_suffix('').with_suffix(
                            '.json').open('r') as fid:
                        jsondata = json.load(fid)
                    if jsondata.get('Defaced'):
                        LOGGER.info(
                            f"Skipping already defaced images: {[str(echofile) for echofile in echofiles]}"
                        )
                        continue

                LOGGER.info(
                    f'Loading mask files: {[str(echofile) for echofile in echofiles]}'
                )
                echos = [nib.load(echofile) for echofile in echofiles]

                # Create a temporary echo-combined image
                tmpfile = session / 'tmp_echocombined_deface.nii'
                combined = nib.Nifti1Image(
                    np.mean([echo.get_fdata() for echo in echos], axis=0),
                    echos[0].affine, echos[0].header)
                combined.to_filename(tmpfile)

                # Deface the echo-combined image
                LOGGER.info(
                    f"Creating a deface-mask from the echo-combined image: {tmpfile}"
                )
                if cluster:
                    jt.args = [
                        str(tmpfile), '--outfile',
                        str(tmpfile), '--force'
                    ] + [
                        item for pair in [[f"--{key}", val]
                                          for key, val in kwargs.items()]
                        for item in pair
                    ]
                    jt.jobName = f"pydeface_{subid}_{sesid}"
                    jobid = pbatch.runJob(jt)
                    LOGGER.info(
                        f"Your deface job has been submitted with ID: {jobid}")
                else:
                    pdu.deface_image(str(tmpfile),
                                     str(tmpfile),
                                     force=True,
                                     forcecleanup=True,
                                     **kwargs)

        if cluster:
            LOGGER.info('Waiting for the deface jobs to finish...')
            pbatch.synchronize(jobIds=[pbatch.JOB_IDS_SESSION_ALL],
                               timeout=pbatch.TIMEOUT_WAIT_FOREVER,
                               dispose=True)
            pbatch.deleteJobTemplate(jt)

    # Loop again over bids subject/session-directories to apply the deface masks and write meta-data
    with logging_redirect_tqdm():
        for n, subject in enumerate(
                tqdm(subjects, unit='subject', leave=False), 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                datasource = bids.DataSource(session / 'dum.my',
                                             subprefix='sub-',
                                             sesprefix='ses-')
                subid, sesid = datasource.subid_sesid()

                # Read the temporary defacemask
                tmpfile = session / 'tmp_echocombined_deface.nii'
                if not tmpfile.is_file():
                    LOGGER.info(f'No {tmpfile} file found')
                    continue
                defacemask = nib.load(tmpfile).get_fdata(
                ) != 0  # The original defacemask is saved in a temporary folder so it may be deleted -> use the defaced image to infer the mask
                tmpfile.unlink()

                # Process the echo-images that need to be defaced
                for echofile in sorted([
                        match for match in session.glob(pattern)
                        if '.nii' in match.suffixes
                ]):

                    # Construct the output filename and relative path name (used in BIDS)
                    echofile_rel = echofile.relative_to(session).as_posix()
                    if not output:
                        outputfile = echofile
                        outputfile_rel = echofile_rel
                    elif output == 'derivatives':
                        outputfile = bidsdir / 'derivatives' / 'deface' / subid / sesid / echofile.parent.name / echofile.name
                        outputfile_rel = outputfile.relative_to(
                            bidsdir).as_posix()
                    else:
                        outputfile = session / output / echofile.name
                        outputfile_rel = outputfile.relative_to(
                            session).as_posix()
                    outputfile.parent.mkdir(parents=True, exist_ok=True)

                    # Apply the defacemask
                    LOGGER.info(
                        f'Applying deface mask on: {echofile} -> {outputfile_rel}'
                    )
                    echoimg = nib.load(echofile)
                    outputimg = nib.Nifti1Image(
                        echoimg.get_fdata() * defacemask, echoimg.affine,
                        echoimg.header)
                    outputimg.to_filename(outputfile)

                    # Overwrite or add a json sidecar-file
                    inputjson = echofile.with_suffix('').with_suffix('.json')
                    outputjson = outputfile.with_suffix('').with_suffix(
                        '.json')
                    if inputjson.is_file() and inputjson != outputjson:
                        if outputjson.is_file():
                            LOGGER.info(
                                f"Overwriting the json sidecar-file: {outputjson}"
                            )
                            outputjson.unlink()
                        else:
                            LOGGER.info(
                                f"Adding a json sidecar-file: {outputjson}")
                        shutil.copyfile(inputjson, outputjson)

                    # Add a custom "Defaced" field to the json sidecar-file
                    with outputjson.open('r') as output_fid:
                        data = json.load(output_fid)
                    data['Defaced'] = True
                    with outputjson.open('w') as output_fid:
                        json.dump(data, output_fid, indent=4)

                    # Update the IntendedFor fields in the fieldmap sidecar-files NB: IntendedFor must be relative to the subject folder
                    if output and output != 'derivatives' and (
                            session / 'fmap').is_dir():
                        for fmap in (session / 'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                fmap_data = json.load(fmap_fid)
                            intendedfor = fmap_data['IntendedFor']
                            if isinstance(intendedfor, str):
                                intendedfor = [intendedfor]
                            if (Path(sesid) /
                                    echofile_rel).as_posix() in intendedfor:
                                LOGGER.info(
                                    f"Updating 'IntendedFor' to {Path(sesid)/outputfile_rel} in {fmap}"
                                )
                                fmap_data['IntendedFor'] = intendedfor + [
                                    (Path(sesid) / outputfile_rel).as_posix()
                                ]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir / '.bidsignore').is_file():
                        bidsignore = (bidsdir /
                                      '.bidsignore').read_text().splitlines()
                    else:
                        bidsignore = []
                    bidsignore.append('derivatives/')
                    scans_tsv = session / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
                    if output and output + '/' not in bidsignore and scans_tsv.is_file(
                    ):
                        LOGGER.info(f"Adding {outputfile_rel} to {scans_tsv}")
                        scans_table = pd.read_csv(scans_tsv,
                                                  sep='\t',
                                                  index_col='filename')
                        scans_table.loc[outputfile_rel] = scans_table.loc[
                            echofile_rel]
                        scans_table.sort_values(by=['acq_time', 'filename'],
                                                inplace=True)
                        scans_table.to_csv(scans_tsv,
                                           sep='\t',
                                           encoding='utf-8')

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict,
                      template: dict, store: dict) -> None:
    """
    All the heuristics spec2nii2bids attributes and properties onto bids labels and meta-data go into this plugin function.
    The function is expected to update / append new runs to the bidsmap_new data structure. The bidsmap options for this plugin
    are stored in:

    bidsmap_new['Options']['plugins']['spec2nii2bids']

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap_new: The new study bidsmap that we are building
    :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap
    :param template:    The template bidsmap with the default heuristics
    :param store:       The paths of the source- and target-folder
    :return:
    """

    # Get the plugin settings
    plugin = {
        'spec2nii2bids': bidsmap_new['Options']['plugins']['spec2nii2bids']
    }

    # Update the bidsmap with the info from the source files
    for sourcefile in [
            file for file in session.rglob('*') if is_sourcefile(file)
    ]:

        datasource = bids.DataSource(sourcefile, plugin)
        dataformat = datasource.dataformat

        # Input checks
        if not template[dataformat] and not bidsmap_old[dataformat]:
            LOGGER.error(
                f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}"
            )
            return
        if not template.get(dataformat) and not bidsmap_old.get(dataformat):
            LOGGER.error(
                f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}"
            )
            return

        # See if we can find a matching run in the old bidsmap
        run, match = bids.get_matching_run(datasource, bidsmap_old)

        # If not, see if we can find a matching run in the template
        if not match:
            run, _ = bids.get_matching_run(datasource, template)

        # See if we have collected the run somewhere in our new bidsmap
        if not bids.exist_run(bidsmap_new, '', run):

            # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample
            if not match:
                LOGGER.info(
                    f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}"
                )

            # Now work from the provenance store
            if store:
                targetfile = store['target'] / sourcefile.relative_to(
                    store['source'])
                targetfile.parent.mkdir(parents=True, exist_ok=True)
                run['provenance'] = str(shutil.copy2(sourcefile, targetfile))

            # Copy the filled-in run over to the new bidsmap
            bids.append_run(bidsmap_new, run)

        else:
            # Communicate with the user if the run was already present in bidsmap_old or in template
            LOGGER.debug(
                f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}"
            )
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    This wrapper funtion around spec2nii converts the MRS data in the session folder and saves it in the bidsfolder.
    Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in:

    bidsmap_new['Options']['plugins']['spec2nii2bids']

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started and see what dataformat we have
    options = bidsmap['Options']['plugins']['spec2nii2bids']
    datasource = bids.get_datasource(session, {'spec2nii2bids': options})
    dataformat = datasource.dataformat
    sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)]
    if not sourcefiles:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Loop over all MRS source data files and convert them to BIDS
    for sourcefile in sourcefiles:

        # Get a data source, a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, {'spec2nii2bids': options})
        run, index = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {sourcefile}")
            continue

        # Check that we know this run
        if index is None:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the MRS output data in {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {sourcefile}")

        # Create the BIDS session/datatype output folder
        outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfile = (outfolder / bidsname).with_suffix('.json')

        # Check if file already exists (-> e.g. when a static runindex is used)
        if jsonfile.is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec',
                        '.tsv.gz'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Run spec2nii to convert the source-files in the run folder to nifti's in the BIDS-folder
        arg = ''
        args = options.get('args', OPTIONS['args'])
        if args is None:
            args = ''
        if dataformat == 'SPAR':
            dformat = 'philips'
            arg = f'"{sourcefile.with_suffix(".SDAT")}"'
        elif dataformat == 'Twix':
            dformat = 'twix'
            arg = '-e image'
        elif dataformat == 'Pfile':
            dformat = 'ge'
        else:
            LOGGER.exception(f"Unsupported dataformat: {dataformat}")
        command = options.get("command", "spec2nii")
        if not bidscoin.run_command(
                f'{command} {dformat} -j -f "{bidsname}" -o "{outfolder}" {args} {arg} "{sourcefile}"'
        ):
            if not list(outfolder.glob(f"{bidsname}.nii*")): continue

        # Load and adapt the newly produced json sidecar-file (NB: assumes every nifti-file comes with a json-file)
        with jsonfile.open('r') as json_fid:
            jsondata = json.load(json_fid)

        # Copy over the source meta-data
        metadata = bids.copymetadata(sourcefile, outfolder / bidsname,
                                     options.get('meta', []))
        for metakey, metaval in metadata.items():
            if jsondata.get(metakey) == metaval:
                LOGGER.warning(
                    f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}"
                )
            jsondata[metakey] = metaval

        # Add all the meta data to the json-file
        for metakey, metaval in run['meta'].items():
            metaval = datasource.dynamicvalue(metaval,
                                              cleanup=False,
                                              runtime=True)
            try:
                metaval = ast.literal_eval(str(metaval))
            except (ValueError, SyntaxError):
                pass
            LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}")
            if not metaval:
                metaval = None
            jsondata[metakey] = metaval

        # Save the meta data to disk
        with jsonfile.open('w') as json_fid:
            json.dump(jsondata, json_fid, indent=4)

        # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition)
        if datasource.datatype not in bidsmap['Options']['bidscoin'][
                'bidsignore'] and not run['bids'][
                    'suffix'] in bids.get_derivatives(datasource.datatype):
            acq_time = ''
            if dataformat == 'SPAR':
                acq_time = datasource.attributes('scan_date')
            elif dataformat == 'Twix':
                acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}"
            elif dataformat == 'Pfile':
                acq_time = f"{datasource.attributes('rhr_rh_scan_date')}T{datasource.attributes('rhr_rh_scan_time')}"
            if not acq_time or acq_time == 'T':
                acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}"
            try:
                acq_time = dateutil.parser.parse(acq_time)
                if options.get('anon', OPTIONS['anon']) in ('y', 'yes'):
                    acq_time = acq_time.replace(
                        year=1925, month=1,
                        day=1)  # Privacy protection (see BIDS specification)
                acq_time = acq_time.isoformat()
            except Exception as jsonerror:
                LOGGER.warning(
                    f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}"
                )
                acq_time = 'n/a'
            scans_table.loc[jsonfile.with_suffix('.nii.gz').
                            relative_to(bidsses).as_posix(),
                            'acq_time'] = acq_time

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Collect personal data from a source header
    personals = {}
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    age = ''
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    if dataformat == 'Twix':
        personals['sex'] = datasource.attributes('PatientSex')
        personals['size'] = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')
        age = datasource.attributes(
            'PatientAge'
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
    elif dataformat == 'Pfile':
        sex = datasource.attributes('rhe_patsex')
        if sex == '0': personals['sex'] = 'O'
        elif sex == '1': personals['sex'] = 'M'
        elif sex == '2': personals['sex'] = 'F'
        age = dateutil.parser.parse(
            datasource.attributes('rhr_rh_scan_date')) - dateutil.parser.parse(
                datasource.attributes('rhe_dateofbirth'))
        age = str(age.days) + 'D'
    if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524
    elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
    elif age.endswith('M'): age = float(age.rstrip('M')) / 12
    elif age.endswith('Y'): age = float(age.rstrip('Y'))
    if age and options.get('anon', OPTIONS['anon']) in ('y', 'yes'):
        age = int(float(age))
    personals['age'] = str(age)

    # Store the collected personals in the participants_table
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    for key in personals:  # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
        if key not in participants_table or participants_table[key].isnull(
        ).get(subid, True) or participants_table[key].get(subid) == 'n/a':
            participants_table.loc[subid, key] = personals[key]

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')
Beispiel #7
0
def bidsparticipants(rawfolder: str, bidsfolder: str, keys: str, bidsmapfile: str='bidsmap.yaml', dryrun: bool=False) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder to (re)generate the particpants.tsv file in the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param keys:            The keys that are extracted fro mthe source data when populating the participants.tsv file
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin
    :param dryrun:          Boolean to just display the participants info
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder  = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()

    # Start logging
    if dryrun:
        bidscoin.setup_logging()
    else:
        bidscoin.setup_logging(bidsfolder/'code'/'bidscoin'/'bidsparticipants.log')
    LOGGER.info('')
    LOGGER.info(f"-------------- START bidsparticipants {bidscoin.version()} ------------")
    LOGGER.info(f">>> bidsparticipants sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile}")

    # Get the bidsmap sub-/ses-prefix from the bidsmap YAML-file
    bidsmap,_ = bids.load_bidsmap(Path(bidsmapfile), bidsfolder/'code'/'bidscoin')
    subprefix = bidsmap['Options']['bidscoin']['subprefix']
    sesprefix = bidsmap['Options']['bidscoin']['sesprefix']

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv  = bidsfolder/'participants.tsv'
    participants_json = participants_tsv.with_suffix('.json')
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    else:
        participants_dict = {'participant_id': {'Description': 'Unique participant identifier'}}

    # Get the list of subjects
    subjects = bidscoin.lsdirs(bidsfolder, 'sub-*')
    if not subjects:
        LOGGER.warning(f"No subjects found in: {bidsfolder}")

    # Remove obsolete participants from the participants table
    for participant in participants_table.index:
        if participant not in [sub.name for sub in subjects]:
            participants_table = participants_table.drop(participant)

    # Loop over all subjects in the bids-folder and add them to the participants table
    with logging_redirect_tqdm():
        for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1):

            LOGGER.info(f"------------------- Subject {n}/{len(subjects)} -------------------")
            personals = dict()
            subject   = rawfolder/subject.name.replace('sub-', subprefix.replace('*',''))     # TODO: This assumes e.g. that the subject-ids in the rawfolder did not contain BIDS-invalid characters (such as '_')
            sessions  = bidscoin.lsdirs(subject, (sesprefix if sesprefix!='*' else '') + '*')
            if not subject.is_dir():
                LOGGER.error(f"Could not find source-folder: {subject}")
                continue
            if not sessions:
                sessions = [subject]
            for session in sessions:

                # Only take data from the first session -> BIDS specification
                subid, sesid = bids.DataSource(session/'dum.my', subprefix='sub-', sesprefix='ses-').subid_sesid()
                if sesprefix and sesid and 'session_id' not in personals:
                    personals['session_id']         = sesid
                    participants_dict['session_id'] = {'Description': 'Session identifier'}

                # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
                sesfolders, unpacked = bids.unpack(session)
                for sesfolder in sesfolders:

                    # Update / append the personal source data
                    LOGGER.info(f"Scanning session: {sesfolder}")
                    success = scanpersonals(bidsmap, sesfolder, personals)

                    # Clean-up the temporary unpacked data
                    if unpacked:
                        shutil.rmtree(sesfolder)

                    if success:
                        break

            # Store the collected personals in the participant_table. TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
            for key in keys:
                if key not in participants_dict:
                    participants_dict[key] = dict(LongName    = 'Long (unabbreviated) name of the column',
                                                  Description = 'Description of the the column',
                                                  Levels      = dict(Key='Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'),
                                                  Units       = 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED')

                participants_table.loc[subid, key] = personals.get(key)

    # Write the collected data to the participant files
    LOGGER.info(f"Writing subject data to: {participants_tsv}")
    if not dryrun:
        participants_table.replace('','n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')

    LOGGER.info(f"Writing subject data dictionary to: {participants_json}")
    if not dryrun:
        with participants_json.open('w') as json_fid:
            json.dump(participants_dict, json_fid, indent=4)

    print(participants_table)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bidscoin.reporterrors()
Beispiel #8
0
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    This wrapper funtion around phys2bids converts the physio data in the session folder and saves it in the bidsfolder.
    Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in:

    bidsmap_new['Options']['plugins']['phys2bidscoin']

    See also the dcm2niix2bids plugin for reference implementation

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started
    plugin = {'phys2bidscoin': bidsmap['Options']['plugins']['phys2bidscoin']}
    datasource = bids.get_datasource(session, plugin)
    sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)]
    if not sourcefiles:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Loop over all source data files and convert them to BIDS
    for sourcefile in sourcefiles:

        # Get a data source, a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {sourcefile}")
            continue

        # Check that we know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the physiological output data in {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {sourcefile}")

        # Get an ordered list of the func runs from the scans.tsv file (which should have a standardized datetime format)
        scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
        if scans_tsv.is_file():
            scans_table = pd.read_csv(scans_tsv,
                                      sep='\t',
                                      index_col='filename')
            scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
        else:
            LOGGER.error(
                f"Could not read the TR's for phys2bids due to a missing '{scans_tsv}' file"
            )
            continue
        funcscans = []
        for index, row in scans_table.iterrows():
            if index.startswith('func/'):
                funcscans.append(index)

        # Then read the TR's from the associated func sidecar files
        tr = []
        for funcscan in funcscans:
            with (bidsses /
                  funcscan).with_suffix('.json').open('r') as json_fid:
                jsondata = json.load(json_fid)
            tr.append(jsondata['RepetitionTime'])

        # Create a heuristic function for phys2bids
        heur_str = ('def heur(physinfo, run=""):\n'
                    '    info = {}\n'
                    f'    if physinfo == "{sourcefile.name}":')
        for key, val in run['bids'].items():
            heur_str = (f'{heur_str}' f'\n        info["{key}"] = "{val}"')
        heur_str = f'{heur_str}\n    return info'

        # Write heuristic function as file in temporary folder
        heur_file = Path(
            tempfile.mkdtemp()) / f'heuristic_sub-{subid}_ses-{sesid}.py'
        heur_file.write_text(heur_str)

        # Run phys2bids
        physiofiles = phys2bids(
            filename=str(sourcefile),
            outdir=str(bidsfolder),
            heur_file=str(heur_file),
            sub=subid,
            ses=sesid,
            chtrig=int(run['meta'].get('TriggerChannel', 0)),
            num_timepoints_expected=run['meta'].get('ExpectedTimepoints',
                                                    None),
            tr=tr,
            pad=run['meta'].get('Pad', 9),
            ch_name=run['meta'].get('ChannelNames', []),
            yml='',
            debug=True,
            quiet=False)

        # Add user-specified meta-data to the newly produced json files (NB: assumes every physio-file comes with a json-file)
        for physiofile in physiofiles:
            jsonfile = Path(physiofile).with_suffix('.json')
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Could not find the expected json sidecar-file: '{jsonfile}'"
                )
                continue
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)
            for metakey, metaval in run['meta'].items():
                metaval = datasource.dynamicvalue(metaval,
                                                  cleanup=False,
                                                  runtime=True)
                try:
                    metaval = ast.literal_eval(str(metaval))
                except (ValueError, SyntaxError):
                    pass
                LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}")
                if not metaval:
                    metaval = None
                jsondata[metakey] = metaval
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)
def rawmapper(rawfolder,
              outfolder: Path = Path(),
              sessions: tuple = (),
              rename: bool = False,
              force: bool = False,
              dicomfield: tuple = ('PatientComments', ),
              wildcard: str = '*',
              subprefix: str = 'sub-',
              sesprefix: str = 'ses-',
              dryrun: bool = False) -> None:
    """
    :param rawfolder:   The root folder-name of the sub/ses/data/file tree containing the source data files
    :param outfolder:   The name of the folder where the mapping-file is saved (default = sourcefolder)
    :param sessions:    Space separated list of selected sub-#/ses-# names / folders to be processed. Otherwise all sessions in the bidsfolder will be selected
    :param rename:      Flag for renaming the sub-subid folders to sub-dicomfield
    :param force:       Flag to rename the directories, even if the target-directory already exists
    :param dicomfield:  The names of the dicomfields that are mapped (/ renamed to sub-dcmval/ses-dcmval)
    :param wildcard:    The Unix style pathname pattern expansion that is used by glob to select the series from which the dicomfield is being mapped
    :param subprefix:   The prefix common for all source subject-folders
    :param sesprefix:   The prefix common for all source session-folders
    :param dryrun:      Flag for dry-running renaming the sub-subid folders
    :return:            Nothing
    """

    # Input checking
    rawfolder = Path(rawfolder).resolve()
    if not outfolder:
        outfolder = rawfolder
        print(f"Outfolder: {outfolder}")
    outfolder = Path(outfolder).resolve()

    # Write the header of the mapper logfile
    mapperfile = outfolder / f"rawmapper_{'_'.join(dicomfield)}.tsv"
    if not dryrun:
        if rename and not mapperfile.is_file():  # Write the header once
            mapperfile.write_text('subid\tsesid\tnewsubid\tnewsesid\n')
        else:  # Write the header once
            mapperfile.write_text('subid\tsesid\tseriesname\t{}\n'.format(
                '\t'.join(dicomfield)))

    # Map the sessions in the sourcefolder
    if not sessions:
        sessions = list(rawfolder.glob(f"{subprefix}*/{sesprefix}*"))
        if not sessions:
            sessions = list(rawfolder.glob(
                f"{subprefix}*"))  # Try without session-subfolders
    else:
        sessions = [
            sessionitem for session in sessions
            for sessionitem in rawfolder.glob(session)
        ]
    sessions = [session for session in sessions if session.is_dir()]

    # Loop over the selected sessions in the sourcefolder
    for session in sessions:

        # Get the subject and session identifiers from the sub/ses session folder
        datasource = bids.DataSource(session / 'dum.my',
                                     subprefix=subprefix,
                                     sesprefix=sesprefix)
        subid, sesid = datasource.subid_sesid()
        subid = subid.replace('sub-', subprefix)
        sesid = sesid.replace('ses-', sesprefix)

        # Parse the new subject and session identifiers from the dicomfield
        series = bidscoin.lsdirs(session, wildcard)
        if not series:
            series = ''
            dcmval = ''
        else:
            series = series[
                0]  # NB: Assumes the first folder contains a dicom file and that all folders give the same info
            dcmval = ''
            for dcmfield in dicomfield:
                dcmval = dcmval + '/' + str(
                    bids.get_dicomfield(dcmfield, bids.get_dicomfile(series)))
            dcmval = dcmval[1:]

        # Rename the session subfolder in the sourcefolder and print & save this info
        if rename:

            # Get the new subid and sesid
            if not dcmval or dcmval == 'None':
                warnings.warn(
                    f"Skipping renaming because the dicom-field was empty for: {session}"
                )
                continue
            else:
                if '/' in dcmval:  # Allow for different sub/ses delimiters that could be entered at the console (i.e. in PatientComments)
                    delim = '/'
                elif '\\' in dcmval:
                    delim = '\\'
                else:
                    delim = '\r\n'
                newsubsesid = [val for val in dcmval.split(delim)
                               if val]  # Skip empty lines / entries
                newsubid = subprefix + bids.cleanup_value(
                    re.sub(f'^{subprefix}', '', newsubsesid[0]))
                if newsubid == subprefix or newsubid == subprefix + 'None':
                    newsubid = subid
                    warnings.warn(
                        f"Could not rename {subid} because the dicom-field was empty for: {session}"
                    )
                if len(newsubsesid) == 1:
                    newsesid = sesid
                elif len(newsubsesid) == 2:
                    newsesid = sesprefix + bids.cleanup_value(
                        re.sub(f'^{sesprefix}', '', newsubsesid[1]))
                else:
                    warnings.warn(
                        f"Skipping renaming of {session} because the dicom-field '{dcmval}' could not be parsed into [subid, sesid]"
                    )
                    continue
                if newsesid == sesprefix or newsesid == subprefix + 'None':
                    newsesid = sesid
                    warnings.warn(
                        f"Could not rename {sesid} because the dicom-field was empty for: {session}"
                    )

            # Save the dicomfield / sub-ses mapping in the mapper logfile and rename the session subfolder (but skip if it already exists)
            newsession = rawfolder / newsubid / newsesid
            print(f"{session} -> {newsession}")
            if newsession == session:
                continue
            if not force and (newsession.is_dir() or newsession.is_file()):
                warnings.warn(
                    f"{newsession} already exists, skipping renaming of {session} (you can use the '-c' option to override this)"
                )
            elif not dryrun:
                with mapperfile.open('a') as fid:
                    fid.write(f"{subid}\t{sesid}\t{newsubid}\t{newsesid}\n")
                if newsession.is_dir():
                    for item in session.iterdir():
                        shutil.move(item, newsession / item.name)
                    session.rmdir()
                else:
                    shutil.move(session, newsession)

        # Print & save the dicom values in the mapper logfile
        else:
            print('{}/{}/{}\t-> {}'.format(subid, sesid, series.name,
                                           '\t'.join(dcmval.split('/'))))
            if not dryrun:
                with mapperfile.open('a') as fid:
                    fid.write('{}\t{}\t{}\t{}\n'.format(
                        subid, sesid, series.name,
                        '\t'.join(dcmval.split('/'))))
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict,
                      template: dict, store: dict) -> None:
    """
    All the logic to map the Nibabel header fields onto bids labels go into this function

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap_new: The new study bidsmap that we are building
    :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap
    :param template:    The template bidsmap with the default heuristics
    :param store:       The paths of the source- and target-folder
    :return:
    """

    # Get started
    plugin = {
        'nibabel2bids': bidsmap_new['Options']['plugins']['nibabel2bids']
    }
    datasource = bids.get_datasource(session, plugin, recurse=2)
    if not datasource.dataformat:
        return
    if not (template[datasource.dataformat]
            or bidsmap_old[datasource.dataformat]):
        LOGGER.error(
            f"No {datasource.dataformat} source information found in the bidsmap and template"
        )
        return

    # Collect the different DICOM/PAR source files for all runs in the session
    for sourcefile in [
            file for file in session.rglob('*') if is_sourcefile(file)
    ]:

        # See if we can find a matching run in the old bidsmap
        datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap_old)

        # If not, see if we can find a matching run in the template
        if not match:
            run, _ = bids.get_matching_run(datasource, template)

        # See if we have collected the run somewhere in our new bidsmap
        if not bids.exist_run(bidsmap_new, '', run):

            # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample
            if not match:
                LOGGER.info(
                    f"Discovered '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}"
                )

            # Now work from the provenance store
            if store:
                targetfile = store['target'] / sourcefile.relative_to(
                    store['source'])
                targetfile.parent.mkdir(parents=True, exist_ok=True)
                run['provenance'] = str(shutil.copy2(sourcefile, targetfile))

            # Copy the filled-in run over to the new bidsmap
            bids.append_run(bidsmap_new, run)

        else:
            # Communicate with the user if the run was already present in bidsmap_old or in template
            LOGGER.debug(
                f"Known '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}"
            )
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    The bidscoiner plugin to convert the session Nibabel source-files into BIDS-valid nifti-files in the
    corresponding bids session-folder

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started
    options = bidsmap['Options']['plugins']['nibabel2bids']
    ext = options.get('ext', OPTIONS['ext'])
    meta = options.get('meta', [])
    sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)]
    if not sourcefiles:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Collect the different Nibabel source files for all files in the session
    for sourcefile in sourcefiles:

        datasource = bids.DataSource(sourcefile, {'nibabel2bids': options})
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {sourcefile}")
            continue

        # Check if we already know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {sourcefile}")

        # Create the BIDS session/datatype output folder
        outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        bidsfile = (outfolder / bidsname).with_suffix(ext)

        # Check if file already exists (-> e.g. when a static runindex is used)
        if bidsfile.is_file():
            LOGGER.warning(
                f"{bidsfile}.* already exists and will be deleted -- check your results carefully!"
            )
            bidsfile.with_suffix('').with_suffix(ext).unlink()

        # Save the sourcefile as a BIDS nifti file
        nib.save(nib.load(sourcefile), bidsfile)

        # Copy over the source meta-data
        jsonfile = bidsfile.with_suffix('').with_suffix('.json')
        jsondata = bids.copymetadata(sourcefile, bidsfile, meta)

        # Add all the meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later
        for metakey, metaval in run['meta'].items():
            if metakey != 'IntendedFor':
                metaval = datasource.dynamicvalue(metaval,
                                                  cleanup=False,
                                                  runtime=True)
                try:
                    metaval = ast.literal_eval(str(metaval))
                except (ValueError, SyntaxError):
                    pass
                LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}")
            if not metaval:
                metaval = None
            jsondata[metakey] = metaval

        # Remove unused (but added from the template) B0FieldIdentifiers/Sources
        if not jsondata.get('B0FieldSource'):
            jsondata.pop('B0FieldSource', None)
        if not jsondata.get('B0FieldIdentifier'):
            jsondata.pop('B0FieldIdentifier', None)

        # Save the meta-data to the json sidecar-file
        with jsonfile.open('w') as json_fid:
            json.dump(jsondata, json_fid, indent=4)

        # Add an entry to the scans_table (we typically don't have useful data to put there)
        acq_time = dateutil.parser.parse(
            f"1925-01-01T{jsondata.get('AcquisitionTime', '')}")
        scans_table.loc[bidsfile.relative_to(bidsses).as_posix(),
                        'acq_time'] = acq_time.isoformat()

    # Write the scans_table to disk
    LOGGER.info(f"Writing data to: {scans_tsv}")
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Add an (empty) entry to the participants_table (we don't have useful data to put there)
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    participants_table.loc[subid, 'session_id'] = sesid if sesid else None

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')