def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    The bidscoiner plugin to add the PET meta data in the excel file to the json-file

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `ses-` folder
    :return:            Nothing
    """

    # Get started and see what data format we have
    plugin = {'petxls2bids': bidsmap['Options']['plugins']['petxls2bids']}
    datasource = bids.get_datasource(session, plugin)
    dataformat = datasource.dataformat
    if not dataformat:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    n = 0
    for file in sorted(session.iterdir()):
        if is_sourcefile(file):

            # Check if there is only one Excel file and one sidecar file (as expected in PET)
            n += 1
            if n > 1:
                LOGGER.error(f"Found ambiguous PET meta data file: {file}")
                return

            # Load the Excel data. TODO: Discuss this with Anthony and Cyril
            metadata = pd.read_excel(file)

            # Load the json sidecar data (there should be only one)
            jsonfile = sorted((bidsses / 'pet').rglob('*.json'))
            if len(jsonfile) > 1:
                LOGGER.error(f"Found ambiguous PET sidecar files: {jsonfile}")
                return
            with jsonfile[0].open('r') as json_fid:
                jsondata = json.load(json_fid)

            # Add the meta-data. TODO: implement this once we know how `metadata` is organised
            for key in metadata:
                jsondata[key] = metadata[key]

            # Save the meta-data to the json sidecar file
            with jsonfile[0].open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)
Example #2
0
def scanpersonals(bidsmap: dict, session: Path, personals: dict) -> bool:
    """
    Converts the session source-files into BIDS-valid nifti-files in the corresponding bidsfolder and
    extracts personals (e.g. Age, Sex) from the source header

    :param bidsmap:     The study bidsmap with the mapping heuristics
    :param session:     The full-path name of the subject/session source file/folder
    :param personals:   The dictionary with the personal information
    :return:            True if successful
    """

    # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file
    datasource = bids.get_datasource(session, bidsmap['Options']['plugins'])
    dataformat = datasource.dataformat
    if not datasource.dataformat:
        LOGGER.info(f"No supported datasources found in '{session}'")
        return False

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    if dataformat in ('DICOM', 'Twix'):
        personals['sex']    = datasource.attributes('PatientSex')
        personals['size']   = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')
        age = datasource.attributes('PatientAge')                   # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
    elif dataformat=='Pfile':
        sex = datasource.attributes('rhe_patsex')
        if sex=='0':   personals['sex'] = 'O'
        elif sex=='1': personals['sex'] = 'M'
        elif sex=='2': personals['sex'] = 'F'
        age = dateutil.parser.parse(datasource.attributes('rhr_rh_scan_date')) - dateutil.parser.parse(datasource.attributes('rhe_dateofbirth'))
        age = str(age.days) + 'D'
    else:
        return False

    if age.endswith('D'):   age = float(age.rstrip('D')) / 365.2524
    elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
    elif age.endswith('M'): age = float(age.rstrip('M')) / 12
    elif age.endswith('Y'): age = float(age.rstrip('Y'))
    if age:
        if bidsmap['Options']['plugins']['dcm2niix2bids'].get('anon', 'y') in ('y','yes'):
            age = int(float(age))
        personals['age'] = str(age)

    return True
Example #3
0
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict,
                      template: dict, store: dict) -> None:
    """
    All the logic to map the DICOM/PAR source fields onto bids labels go into this function

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap_new: The new study bidsmap that we are building
    :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap
    :param template:    The template bidsmap with the default heuristics
    :param store:       The paths of the source- and target-folder
    :return:
    """

    # Get started
    plugin = {
        'dcm2niix2bids': bidsmap_new['Options']['plugins']['dcm2niix2bids']
    }
    datasource = bids.get_datasource(session, plugin)
    dataformat = datasource.dataformat
    if not dataformat:
        return

    # Collect the different DICOM/PAR source files for all runs in the session
    sourcefiles = []
    if dataformat == 'DICOM':
        for sourcedir in bidscoin.lsdirs(session):
            for n in range(
                    1
            ):  # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-()
                sourcefile = bids.get_dicomfile(sourcedir, n)
                if sourcefile.name:
                    sourcefiles.append(sourcefile)
    elif dataformat == 'PAR':
        sourcefiles = bids.get_parfiles(session)
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Update the bidsmap with the info from the source files
    for sourcefile in sourcefiles:

        # Input checks
        if not sourcefile.name or (not template[dataformat]
                                   and not bidsmap_old[dataformat]):
            LOGGER.error(
                f"No {dataformat} source information found in the bidsmap and template for: {sourcefile}"
            )
            return

        # See if we can find a matching run in the old bidsmap
        datasource = bids.DataSource(sourcefile, plugin, dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap_old)

        # If not, see if we can find a matching run in the template
        if not match:
            run, _ = bids.get_matching_run(datasource, template)

        # See if we have collected the run somewhere in our new bidsmap
        if not bids.exist_run(bidsmap_new, '', run):

            # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample
            if not match:
                LOGGER.info(
                    f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}"
                )

            # Now work from the provenance store
            if store:
                targetfile = store['target'] / sourcefile.relative_to(
                    store['source'])
                targetfile.parent.mkdir(parents=True, exist_ok=True)
                run['provenance'] = str(shutil.copy2(sourcefile, targetfile))

            # Copy the filled-in run over to the new bidsmap
            bids.append_run(bidsmap_new, run)

        else:
            # Communicate with the user if the run was already present in bidsmap_old or in template
            LOGGER.debug(
                f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}"
            )
Example #4
0
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid nifti-files in the
    corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header

    :param session:     The full-path name of the subject/session source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started and see what dataformat we have
    options = bidsmap['Options']['plugins']['dcm2niix2bids']
    datasource = bids.get_datasource(session, {'dcm2niix2bids': options})
    dataformat = datasource.dataformat
    if not dataformat:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Make a list of all the data sources / runs
    manufacturer = 'UNKNOWN'
    sources = []
    if dataformat == 'DICOM':
        sources = bidscoin.lsdirs(session)
        manufacturer = datasource.attributes('Manufacturer')
    elif dataformat == 'PAR':
        sources = bids.get_parfiles(session)
        manufacturer = 'Philips Medical Systems'
    else:
        LOGGER.exception(f"Unsupported dataformat '{dataformat}'")

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Process all the source files or run subfolders
    sourcefile = Path()
    for source in sources:

        # Get a sourcefile
        if dataformat == 'DICOM':
            sourcefile = bids.get_dicomfile(source)
        elif dataformat == 'PAR':
            sourcefile = source
        if not sourcefile.name:
            continue

        # Get a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options},
                                     dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {source}")
            continue

        # Check if we already know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {source}")

        # Create the BIDS session/datatype output folder
        if run['bids']['suffix'] in bids.get_derivatives(datasource.datatype):
            outfolder = bidsfolder / 'derivatives' / manufacturer.replace(
                ' ', '') / subid / sesid / datasource.datatype
        else:
            outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfiles = [
            (outfolder / bidsname).with_suffix('.json')
        ]  # List -> Collect the associated json-files (for updating them later) -- possibly > 1

        # Check if file already exists (-> e.g. when a static runindex is used)
        if (outfolder / bidsname).with_suffix('.json').is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.tsv', '.tsv.gz', '.bval',
                        '.bvec'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Convert physiological log files (dcm2niix can't handle these)
        if run['bids']['suffix'] == 'physio':
            if bids.get_dicomfile(
                    source, 2).name:  # TODO: issue warning or support PAR
                LOGGER.warning(
                    f"Found > 1 DICOM file in {source}, using: {sourcefile}")
            physiodata = physio.readphysio(sourcefile)
            physio.physio2tsv(physiodata, outfolder / bidsname)

        # Convert the source-files in the run folder to nifti's in the BIDS-folder
        else:
            command = '{command} {args} -f "{filename}" -o "{outfolder}" "{source}"'.format(
                command=options['command'],
                args=options.get('args', ''),
                filename=bidsname,
                outfolder=outfolder,
                source=source)
            if not bidscoin.run_command(command):
                if not list(outfolder.glob(f"{bidsname}.nii*")): continue
            if list(outfolder.glob(f"{bidsname}a.nii*")):
                LOGGER.warning(
                    f"Unexpected variants of {outfolder/bidsname}* were produced by dcm2niix. Possibly this can be remedied by using the dcm2niix -i option (to ignore derived, localizer and 2D images)"
                )

            # Replace uncropped output image with the cropped one
            if '-x y' in options.get('args', ''):
                for dcm2niixfile in sorted(
                        outfolder.glob(bidsname +
                                       '*_Crop_*')):  # e.g. *_Crop_1.nii.gz
                    ext = ''.join(dcm2niixfile.suffixes)
                    newbidsfile = str(dcm2niixfile).rsplit(ext, 1)[0].rsplit(
                        '_Crop_', 1)[0] + ext
                    LOGGER.info(
                        f"Found dcm2niix _Crop_ postfix, replacing original file\n{dcm2niixfile} ->\n{newbidsfile}"
                    )
                    dcm2niixfile.replace(newbidsfile)

            # Rename all files that got additional postfixes from dcm2niix. See: https://github.com/rordenlab/dcm2niix/blob/master/FILENAMING.md
            dcm2niixpostfixes = ('_c', '_i', '_Eq', '_real', '_imaginary',
                                 '_MoCo', '_t', '_Tilt', '_e', '_ph', '_ADC',
                                 '_fieldmaphz')
            dcm2niixfiles = sorted(
                set([
                    dcm2niixfile for dcm2niixpostfix in dcm2niixpostfixes
                    for dcm2niixfile in outfolder.glob(
                        f"{bidsname}*{dcm2niixpostfix}*.nii*")
                ]))
            if not jsonfiles[0].is_file(
            ) and dcm2niixfiles:  # Possibly renamed by dcm2niix, e.g. with multi-echo data (but not always for the first echo)
                jsonfiles.pop(0)
            for dcm2niixfile in dcm2niixfiles:
                ext = ''.join(dcm2niixfile.suffixes)
                postfixes = str(dcm2niixfile).split(bidsname)[1].rsplit(
                    ext)[0].split('_')[1:]
                newbidsname = dcm2niixfile.name  # Strip the additional postfixes and assign them to bids entities in the for-loop below
                for postfix in postfixes:  # dcm2niix postfixes _c%d, _e%d and _ph (and any combination of these in that order) are for multi-coil data, multi-echo data and phase data

                    # Patch the echo entity in the newbidsname with the dcm2niix echo info                      # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series / run-folder
                    if 'echo' in run['bids'] and postfix.startswith('e'):
                        echonr = f"_{postfix}".replace('_e',
                                                       '')  # E.g. postfix='e1'
                        if not echonr:
                            echonr = '1'
                        if echonr.isnumeric():
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'echo', echonr.lstrip('0')
                            )  # In contrast to other labels, run and echo labels MUST be integers. Those labels MAY include zero padding, but this is NOT RECOMMENDED to maintain their uniqueness
                        else:
                            LOGGER.error(
                                f"Unexpected postix '{postfix}' found in {dcm2niixfile}"
                            )
                            newbidsname = bids.get_bidsvalue(
                                newbidsname, 'dummy', postfix
                            )  # Append the unknown postfix to the acq-label

                    # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info
                    elif 'part' in run['bids'] and postfix in (
                            'ph', 'real', 'imaginary'
                    ):  # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0]
                        if postfix == 'ph':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'phase')
                        if postfix == 'real':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'real')
                        if postfix == 'imaginary':
                            newbidsname = bids.insert_bidskeyval(
                                newbidsname, 'part', 'imag')

                    # Patch fieldmap images (NB: datatype=='fmap' is too broad, see the fmap.yaml file)
                    elif run['bids']['suffix'] in bids.bidsdatatypes['fmap'][0][
                            'suffixes']:  # i.e. in ('magnitude','magnitude1','magnitude2','phase1','phase2','phasediff','fieldmap'). TODO: Make this robust for future BIDS versions
                        if len(dcm2niixfiles) not in (
                                1, 2, 3, 4
                        ):  # Phase / echo data may be stored in the same data source / run folder
                            LOGGER.debug(
                                f"Unknown fieldmap {outfolder/bidsname} for '{postfix}'"
                            )
                        newbidsname = newbidsname.replace(
                            '_magnitude1a', '_magnitude2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_pha', '_phase2'
                        )  # First catch this potential weird / rare case
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e1', '_magnitude1'
                        )  # Case 2 = Two phase and magnitude images
                        newbidsname = newbidsname.replace(
                            '_magnitude1_e2', '_magnitude2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e1', '_magnitude1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_magnitude2_e2', '_magnitude2')  # Case 2
                        if len(dcm2niixfiles) in (
                                2, 3
                        ):  # Case 1 = One or two magnitude + one phasediff image
                            newbidsname = newbidsname.replace(
                                '_magnitude1_ph', '_phasediff')
                            newbidsname = newbidsname.replace(
                                '_magnitude2_ph', '_phasediff')
                        newbidsname = newbidsname.replace(
                            '_phasediff_e1', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_e2', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_phasediff_ph', '_phasediff')  # Case 1
                        newbidsname = newbidsname.replace(
                            '_magnitude1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase1_e1', '_phase1')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_e2', '_phase2'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e1', '_phase1'
                        )  # Case 2: This can happen when the e2 image is stored in the same directory as the e1 image, but with the e2 listed first
                        newbidsname = newbidsname.replace(
                            '_phase2_e2', '_phase2')  # Case 2
                        newbidsname = newbidsname.replace(
                            '_phase1_ph', '_phase1'
                        )  # Case 2: One or two magnitude and phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_phase2_ph', '_phase2'
                        )  # Case 2: Two magnitude + two phase images in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_magnitude_e1', '_magnitude'
                        )  # Case 3 = One magnitude + one fieldmap image
                        if len(dcm2niixfiles) == 2:
                            newbidsname = newbidsname.replace(
                                '_fieldmap_e1', '_magnitude'
                            )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_e1', '_fieldmap')  # Case 3
                        newbidsname = newbidsname.replace(
                            '_magnitude_ph', '_fieldmap'
                        )  # Case 3: One magnitude + one fieldmap image in one folder / datasource
                        newbidsname = newbidsname.replace(
                            '_fieldmap_ph', '_fieldmap')  # Case 3

                    # Append the dcm2niix info to acq-label, may need to be improved / elaborated for future BIDS standards, supporting multi-coil data
                    else:
                        newbidsname = bids.get_bidsvalue(
                            newbidsname, 'dummy', postfix)

                    # Remove the added postfix from the new bidsname
                    newbidsname = newbidsname.replace(f"_{postfix}_",
                                                      '_')  # If it is not last
                    newbidsname = newbidsname.replace(f"_{postfix}.",
                                                      '.')  # If it is last

                    # The ADC images are not BIDS compliant
                    if postfix == 'ADC':
                        LOGGER.warning(
                            f"The {newbidsname} image is most likely not BIDS-compliant -- you can probably delete it safely and update the scants.tsv file"
                        )

                # Save the nifti file with a new name
                if runindex.startswith('<<') and runindex.endswith('>>'):
                    newbidsname = bids.increment_runindex(
                        outfolder, newbidsname, ''
                    )  # Update the runindex now that the acq-label has changed
                newbidsfile = outfolder / newbidsname
                LOGGER.info(
                    f"Found dcm2niix {postfixes} postfixes, renaming\n{dcm2niixfile} ->\n{newbidsfile}"
                )
                if newbidsfile.is_file():
                    LOGGER.warning(
                        f"Overwriting existing {newbidsfile} file -- check your results carefully!"
                    )
                dcm2niixfile.replace(newbidsfile)

                # Rename all associated files (i.e. the json-, bval- and bvec-files)
                oldjsonfile = dcm2niixfile.with_suffix('').with_suffix('.json')
                newjsonfile = newbidsfile.with_suffix('').with_suffix('.json')
                if not oldjsonfile.is_file():
                    LOGGER.warning(
                        f"Unexpected file conversion result: {oldjsonfile} not found"
                    )
                else:
                    if oldjsonfile in jsonfiles:
                        jsonfiles.remove(oldjsonfile)
                    if newjsonfile not in jsonfiles:
                        jsonfiles.append(newjsonfile)
                for oldfile in outfolder.glob(
                        dcm2niixfile.with_suffix('').stem + '.*'):
                    oldfile.replace(
                        newjsonfile.with_suffix(''.join(oldfile.suffixes)))

        # Copy over the source meta-data
        metadata = bids.copymetadata(sourcefile, outfolder / bidsname,
                                     options.get('meta', []))

        # Loop over and adapt all the newly produced json sidecar-files and write to the scans.tsv file (NB: assumes every nifti-file comes with a json-file)
        for jsonfile in sorted(set(jsonfiles)):

            # Load the json meta-data
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)

            # Add all the source meta data to the meta-data
            for metakey, metaval in metadata.items():
                if jsondata.get(metakey) == metaval:
                    LOGGER.warning(
                        f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}"
                    )
                jsondata[metakey] = metaval

            # Add all the run meta data to the meta-data. NB: the dynamic `IntendedFor` value is handled separately later
            for metakey, metaval in run['meta'].items():
                if metakey != 'IntendedFor':
                    metaval = datasource.dynamicvalue(metaval,
                                                      cleanup=False,
                                                      runtime=True)
                    try:
                        metaval = ast.literal_eval(str(metaval))
                    except (ValueError, SyntaxError):
                        pass
                    LOGGER.info(
                        f"Adding '{metakey}: {metaval}' to: {jsonfile}")
                if not metaval:
                    metaval = None
                jsondata[metakey] = metaval

            # Remove unused (but added from the template) B0FieldIdentifiers/Sources
            if not jsondata.get('B0FieldSource'):
                jsondata.pop('B0FieldSource', None)
            if not jsondata.get('B0FieldIdentifier'):
                jsondata.pop('B0FieldIdentifier', None)

            # Save the meta-data to the json sidecar-file
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)

            # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition)
            outputfile = [
                file for file in jsonfile.parent.glob(jsonfile.stem + '.*')
                if file.suffix in ('.nii', '.gz')
            ]  # Find the corresponding nifti/tsv.gz file (there should be only one, let's not make assumptions about the .gz extension)
            if not outputfile:
                LOGGER.exception(
                    f"No data-file found with {jsonfile} when updating {scans_tsv}"
                )
            elif datasource.datatype not in bidsmap['Options']['bidscoin'][
                    'bidsignore'] and not run['bids'][
                        'suffix'] in bids.get_derivatives(datasource.datatype):
                acq_time = ''
                if dataformat == 'DICOM':
                    acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}"
                elif dataformat == 'PAR':
                    acq_time = datasource.attributes('exam_date')
                if not acq_time or acq_time == 'T':
                    acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}"
                try:
                    acq_time = dateutil.parser.parse(acq_time)
                    if options.get('anon', 'y') in ('y', 'yes'):
                        acq_time = acq_time.replace(
                            year=1925, month=1, day=1
                        )  # Privacy protection (see BIDS specification)
                    acq_time = acq_time.isoformat()
                except Exception as jsonerror:
                    LOGGER.warning(
                        f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}"
                    )
                    acq_time = 'n/a'
                scanpath = outputfile[0].relative_to(bidsses)
                scans_table.loc[scanpath.as_posix(), 'acq_time'] = acq_time

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Collect personal data from a source header (PAR/XML does not contain personal info)
    personals = {}
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    personals['age'] = ''
    if dataformat == 'DICOM':
        age = datasource.attributes(
            'PatientAge'
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
        if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524
        elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
        elif age.endswith('M'): age = float(age.rstrip('M')) / 12
        elif age.endswith('Y'): age = float(age.rstrip('Y'))
        if age:
            if options.get('anon', 'y') in ('y', 'yes'):
                age = int(float(age))
            personals['age'] = str(age)
        personals['sex'] = datasource.attributes('PatientSex')
        personals['size'] = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')

    # Store the collected personals in the participants_table
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    for key in personals:  # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
        if key not in participants_table or participants_table[key].isnull(
        ).get(subid, True) or participants_table[key].get(subid) == 'n/a':
            participants_table.loc[subid, key] = personals[key]

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')
Example #5
0
def bidscoiner(rawfolder: str,
               bidsfolder: str,
               subjects: list = (),
               force: bool = False,
               participants: bool = False,
               bidsmapfile: str = 'bidsmap.yaml') -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder and uses the
    bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param subjects:        List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected
    :param force:           If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped
    :param participants:    If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()
    bidsmapfile = Path(bidsmapfile)

    # Start logging
    bidscoin.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log')
    LOGGER.info('')
    LOGGER.info(
        f"-------------- START BIDScoiner {localversion}: BIDS {bidscoin.bidsversion()} ------------"
    )
    LOGGER.info(
        f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force} participants={participants} bidsmap={bidsmapfile}"
    )

    # Create a code/bidscoin subfolder
    (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True)

    # Create a dataset description file if it does not exist
    dataset_file = bidsfolder / 'dataset_description.json'
    generatedby = [{
        "Name": "BIDScoin",
        "Version": localversion,
        "CodeURL": "https://github.com/Donders-Institute/bidscoin"
    }]
    if not dataset_file.is_file():
        LOGGER.info(f"Creating dataset description file: {dataset_file}")
        dataset_description = {
            "Name":
            "REQUIRED. Name of the dataset",
            "GeneratedBy":
            generatedby,
            "BIDSVersion":
            str(bidscoin.bidsversion()),
            "DatasetType":
            "raw",
            "License":
            "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file",
            "Authors": [
                "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset"
            ],
            "Acknowledgements":
            "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding",
            "HowToAcknowledge":
            "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset",
            "Funding":
            ["OPTIONAL. List of sources of funding (grant numbers)"],
            "EthicsApprovals": [
                "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers"
            ],
            "ReferencesAndLinks": [
                "OPTIONAL. List of references to publication that contain information on the dataset, or links",
                "https://github.com/Donders-Institute/bidscoin"
            ],
            "DatasetDOI":
            "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)"
        }
    else:
        with dataset_file.open('r') as fid:
            dataset_description = json.load(fid)
        if 'BIDScoin' not in [
                generatedby_['Name']
                for generatedby_ in dataset_description.get('GeneratedBy', [])
        ]:
            LOGGER.info(f"Adding {generatedby} to {dataset_file}")
            dataset_description['GeneratedBy'] = dataset_description.get(
                'GeneratedBy', []) + generatedby
    with dataset_file.open('w') as fid:
        json.dump(dataset_description, fid, indent=4)

    # Create a README file if it does not exist
    readme_file = bidsfolder / 'README'
    if not readme_file.is_file():
        LOGGER.info(f"Creating README file: {readme_file}")
        readme_file.write_text(
            f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n"
            f"The raw BIDS data was created using BIDScoin {localversion}\n"
            f"All provenance information and settings can be found in ./code/bidscoin\n"
            f"For more information see: https://github.com/Donders-Institute/bidscoin\n"
        )

    # Get the bidsmap heuristics from the bidsmap YAML-file
    bidsmap, _ = bids.load_bidsmap(bidsmapfile,
                                   bidsfolder / 'code' / 'bidscoin')
    dataformats = [
        dataformat for dataformat in bidsmap
        if dataformat and dataformat not in ('Options', 'PlugIns')
    ]  # Handle legacy bidsmaps (-> 'PlugIns')
    if not bidsmap:
        LOGGER.error(
            f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and/or use the correct bidsfolder"
        )
        return

    # Load the data conversion plugins
    plugins = [
        bidscoin.import_plugin(plugin, ('bidscoiner_plugin', ))
        for plugin, options in bidsmap['Options']['plugins'].items()
    ]
    plugins = [plugin for plugin in plugins
               if plugin]  # Filter the empty items from the list
    if not plugins:
        LOGGER.warning(
            f"The plugins listed in your bidsmap['Options'] did not have a usable `bidscoiner_plugin` function, nothing to do"
        )
        LOGGER.info('-------------- FINISHED! ------------')
        LOGGER.info('')
        return

    # Append options to the .bidsignore file
    bidsignore_items = [
        item.strip()
        for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';')
    ]
    bidsignore_file = bidsfolder / '.bidsignore'
    if bidsignore_items:
        LOGGER.info(f"Writing {bidsignore_items} entries to {bidsignore_file}")
        if bidsignore_file.is_file():
            bidsignore_items += bidsignore_file.read_text().splitlines()
        with bidsignore_file.open('w') as bidsignore:
            for item in set(bidsignore_items):
                bidsignore.write(item + '\n')

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'

    # Get the list of subjects
    subprefix = bidsmap['Options']['bidscoin']['subprefix'].replace('*', '')
    sesprefix = bidsmap['Options']['bidscoin']['sesprefix'].replace('*', '')
    if not subjects:
        subjects = bidscoin.lsdirs(
            rawfolder, (subprefix if subprefix != '*' else '') + '*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*")
    else:
        subjects = [
            rawfolder / (subprefix + re.sub(f"^{subprefix}", '', subject))
            for subject in subjects
        ]  # Make sure there is a sub-prefix

    # Loop over all subjects and sessions and convert them using the bidsmap entries
    with logging_redirect_tqdm():
        for n, subject in enumerate(
                tqdm(subjects, unit='subject', leave=False), 1):

            LOGGER.info(
                f"------------------- Subject {n}/{len(subjects)} -------------------"
            )
            if participants and subject.name in list(participants_table.index):
                LOGGER.info(
                    f"Skipping subject: {subject} ({n}/{len(subjects)})")
                continue
            if not subject.is_dir():
                LOGGER.warning(
                    f"The '{subject}' subject folder does not exist")
                continue

            sessions = bidscoin.lsdirs(
                subject, (sesprefix if sesprefix != '*' else '') + '*')
            if not sessions or (subject / 'DICOMDIR').is_file():
                sessions = [subject]
            for session in sessions:

                # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
                sesfolders, unpacked = bids.unpack(session)
                for sesfolder in sesfolders:

                    # Check if we should skip the session-folder
                    datasource = bids.get_datasource(
                        sesfolder, bidsmap['Options']['plugins'])
                    if not datasource.dataformat:
                        LOGGER.info(
                            f"No coinable datasources found in '{sesfolder}'")
                        continue
                    subid = bidsmap[datasource.dataformat]['subject']
                    sesid = bidsmap[datasource.dataformat]['session']
                    subid, sesid = datasource.subid_sesid(
                        subid, sesid if sesid else '')
                    bidssession = bidsfolder / subid / sesid  # TODO: Support DICOMDIR with multiple subjects (as in PYDICOMDIR)
                    if not force and bidssession.is_dir():
                        datatypes = []
                        for dataformat in dataformats:
                            for datatype in bidscoin.lsdirs(
                                    bidssession
                            ):  # See what datatypes we already have in the bids session-folder
                                if datatype.iterdir(
                                ) and bidsmap[dataformat].get(
                                        datatype.name
                                ):  # See if we are going to add data for this datatype
                                    datatypes.append(datatype.name)
                        if datatypes:
                            LOGGER.info(
                                f"Skipping processed session: {bidssession} already has {datatypes} data (you can carefully use the -f option to overrule)"
                            )
                            continue

                    LOGGER.info(f"Coining datasources in: {sesfolder}")
                    if bidssession.is_dir():
                        LOGGER.warning(
                            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidssession} was cleaned-up from old data before (re)running the bidscoiner"
                        )
                    bidssession.mkdir(parents=True, exist_ok=True)

                    # Run the bidscoiner plugins
                    for module in plugins:
                        LOGGER.info(
                            f"Executing plugin: {Path(module.__file__).name}")
                        module.bidscoiner_plugin(sesfolder, bidsmap,
                                                 bidssession)

                    # Add the special fieldmap metadata (IntendedFor, B0FieldIdentifier, TE, etc)
                    addmetadata(bidssession, subid, sesid)

                    # Clean-up the temporary unpacked data
                    if unpacked:
                        shutil.rmtree(sesfolder)

    # Re-read the participants_table and store the collected personals in the json sidecar-file
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    participants_json = participants_tsv.with_suffix('.json')
    participants_dict = {}
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    if not participants_dict.get('participant_id'):
        participants_dict['participant_id'] = {
            'Description': 'Unique participant identifier'
        }
    if not participants_dict.get(
            'session_id') and 'session_id' in participants_table.columns:
        participants_dict['session_id'] = {'Description': 'Session identifier'}
    newkey = False
    for col in participants_table.columns:
        if col not in participants_dict:
            newkey = True
            participants_dict[col] = dict(
                LongName='Long (unabbreviated) name of the column',
                Description='Description of the the column',
                Levels=dict(
                    Key=
                    'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'
                ),
                Units=
                'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED'
            )

    # Write the collected data to the participant files
    if newkey:
        LOGGER.info(f"Writing subject meta data to: {participants_json}")
        with participants_json.open('w') as json_fid:
            json.dump(participants_dict, json_fid, indent=4)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bidscoin.reporterrors()
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    This wrapper funtion around spec2nii converts the MRS data in the session folder and saves it in the bidsfolder.
    Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in:

    bidsmap_new['Options']['plugins']['spec2nii2bids']

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started and see what dataformat we have
    options = bidsmap['Options']['plugins']['spec2nii2bids']
    datasource = bids.get_datasource(session, {'spec2nii2bids': options})
    dataformat = datasource.dataformat
    sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)]
    if not sourcefiles:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Read or create a scans_table and tsv-file
    scans_tsv = bidsses / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
    if scans_tsv.is_file():
        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
    else:
        scans_table = pd.DataFrame(columns=['acq_time'], dtype='str')
        scans_table.index.name = 'filename'

    # Loop over all MRS source data files and convert them to BIDS
    for sourcefile in sourcefiles:

        # Get a data source, a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, {'spec2nii2bids': options})
        run, index = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {sourcefile}")
            continue

        # Check that we know this run
        if index is None:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the MRS output data in {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {sourcefile}")

        # Create the BIDS session/datatype output folder
        outfolder = bidsses / datasource.datatype
        outfolder.mkdir(parents=True, exist_ok=True)

        # Compose the BIDS filename using the matched run
        bidsname = bids.get_bidsname(subid, sesid, run, runtime=True)
        runindex = run['bids'].get('run', '')
        if runindex.startswith('<<') and runindex.endswith('>>'):
            bidsname = bids.increment_runindex(outfolder, bidsname)
        jsonfile = (outfolder / bidsname).with_suffix('.json')

        # Check if file already exists (-> e.g. when a static runindex is used)
        if jsonfile.is_file():
            LOGGER.warning(
                f"{outfolder/bidsname}.* already exists and will be deleted -- check your results carefully!"
            )
            for ext in ('.nii.gz', '.nii', '.json', '.bval', '.bvec',
                        '.tsv.gz'):
                (outfolder / bidsname).with_suffix(ext).unlink(missing_ok=True)

        # Run spec2nii to convert the source-files in the run folder to nifti's in the BIDS-folder
        arg = ''
        args = options.get('args', OPTIONS['args'])
        if args is None:
            args = ''
        if dataformat == 'SPAR':
            dformat = 'philips'
            arg = f'"{sourcefile.with_suffix(".SDAT")}"'
        elif dataformat == 'Twix':
            dformat = 'twix'
            arg = '-e image'
        elif dataformat == 'Pfile':
            dformat = 'ge'
        else:
            LOGGER.exception(f"Unsupported dataformat: {dataformat}")
        command = options.get("command", "spec2nii")
        if not bidscoin.run_command(
                f'{command} {dformat} -j -f "{bidsname}" -o "{outfolder}" {args} {arg} "{sourcefile}"'
        ):
            if not list(outfolder.glob(f"{bidsname}.nii*")): continue

        # Load and adapt the newly produced json sidecar-file (NB: assumes every nifti-file comes with a json-file)
        with jsonfile.open('r') as json_fid:
            jsondata = json.load(json_fid)

        # Copy over the source meta-data
        metadata = bids.copymetadata(sourcefile, outfolder / bidsname,
                                     options.get('meta', []))
        for metakey, metaval in metadata.items():
            if jsondata.get(metakey) == metaval:
                LOGGER.warning(
                    f"Replacing {metakey} values in {jsonfile}: {jsondata[metakey]} -> {metaval}"
                )
            jsondata[metakey] = metaval

        # Add all the meta data to the json-file
        for metakey, metaval in run['meta'].items():
            metaval = datasource.dynamicvalue(metaval,
                                              cleanup=False,
                                              runtime=True)
            try:
                metaval = ast.literal_eval(str(metaval))
            except (ValueError, SyntaxError):
                pass
            LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}")
            if not metaval:
                metaval = None
            jsondata[metakey] = metaval

        # Save the meta data to disk
        with jsonfile.open('w') as json_fid:
            json.dump(jsondata, json_fid, indent=4)

        # Parse the acquisition time from the source header or else from the json file (NB: assuming the source file represents the first acquisition)
        if datasource.datatype not in bidsmap['Options']['bidscoin'][
                'bidsignore'] and not run['bids'][
                    'suffix'] in bids.get_derivatives(datasource.datatype):
            acq_time = ''
            if dataformat == 'SPAR':
                acq_time = datasource.attributes('scan_date')
            elif dataformat == 'Twix':
                acq_time = f"{datasource.attributes('AcquisitionDate')}T{datasource.attributes('AcquisitionTime')}"
            elif dataformat == 'Pfile':
                acq_time = f"{datasource.attributes('rhr_rh_scan_date')}T{datasource.attributes('rhr_rh_scan_time')}"
            if not acq_time or acq_time == 'T':
                acq_time = f"1925-01-01T{jsondata.get('AcquisitionTime','')}"
            try:
                acq_time = dateutil.parser.parse(acq_time)
                if options.get('anon', OPTIONS['anon']) in ('y', 'yes'):
                    acq_time = acq_time.replace(
                        year=1925, month=1,
                        day=1)  # Privacy protection (see BIDS specification)
                acq_time = acq_time.isoformat()
            except Exception as jsonerror:
                LOGGER.warning(
                    f"Could not parse the acquisition time from: {sourcefile}\n{jsonerror}"
                )
                acq_time = 'n/a'
            scans_table.loc[jsonfile.with_suffix('.nii.gz').
                            relative_to(bidsses).as_posix(),
                            'acq_time'] = acq_time

    # Write the scans_table to disk
    LOGGER.info(f"Writing acquisition time data to: {scans_tsv}")
    scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
    scans_table.replace('', 'n/a').to_csv(scans_tsv,
                                          sep='\t',
                                          encoding='utf-8',
                                          na_rep='n/a')

    # Collect personal data from a source header
    personals = {}
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    age = ''
    if sesid and 'session_id' not in personals:
        personals['session_id'] = sesid
    if dataformat == 'Twix':
        personals['sex'] = datasource.attributes('PatientSex')
        personals['size'] = datasource.attributes('PatientSize')
        personals['weight'] = datasource.attributes('PatientWeight')
        age = datasource.attributes(
            'PatientAge'
        )  # A string of characters with one of the following formats: nnnD, nnnW, nnnM, nnnY
    elif dataformat == 'Pfile':
        sex = datasource.attributes('rhe_patsex')
        if sex == '0': personals['sex'] = 'O'
        elif sex == '1': personals['sex'] = 'M'
        elif sex == '2': personals['sex'] = 'F'
        age = dateutil.parser.parse(
            datasource.attributes('rhr_rh_scan_date')) - dateutil.parser.parse(
                datasource.attributes('rhe_dateofbirth'))
        age = str(age.days) + 'D'
    if age.endswith('D'): age = float(age.rstrip('D')) / 365.2524
    elif age.endswith('W'): age = float(age.rstrip('W')) / 52.1775
    elif age.endswith('M'): age = float(age.rstrip('M')) / 12
    elif age.endswith('Y'): age = float(age.rstrip('Y'))
    if age and options.get('anon', OPTIONS['anon']) in ('y', 'yes'):
        age = int(float(age))
    personals['age'] = str(age)

    # Store the collected personals in the participants_table
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t', dtype=str)
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if subid in participants_table.index and 'session_id' in participants_table.keys(
    ) and participants_table.loc[subid, 'session_id']:
        return  # Only take data from the first session -> BIDS specification
    for key in personals:  # TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
        if key not in participants_table or participants_table[key].isnull(
        ).get(subid, True) or participants_table[key].get(subid) == 'n/a':
            participants_table.loc[subid, key] = personals[key]

    # Write the collected data to the participants tsv-file
    LOGGER.info(f"Writing {subid} subject data to: {participants_tsv}")
    participants_table.replace('', 'n/a').to_csv(participants_tsv,
                                                 sep='\t',
                                                 encoding='utf-8',
                                                 na_rep='n/a')
Example #7
0
def bidscoiner_plugin(session: Path, bidsmap: dict, bidsses: Path) -> None:
    """
    This wrapper funtion around phys2bids converts the physio data in the session folder and saves it in the bidsfolder.
    Each saved datafile should be accompanied with a json sidecar file. The bidsmap options for this plugin can be found in:

    bidsmap_new['Options']['plugins']['phys2bidscoin']

    See also the dcm2niix2bids plugin for reference implementation

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap:     The full mapping heuristics from the bidsmap YAML-file
    :param bidsses:     The full-path name of the BIDS output `sub-/ses-` folder
    :return:            Nothing
    """

    # Get the subject identifiers and the BIDS root folder from the bidsses folder
    if bidsses.name.startswith('ses-'):
        bidsfolder = bidsses.parent.parent
        subid = bidsses.parent.name
        sesid = bidsses.name
    else:
        bidsfolder = bidsses.parent
        subid = bidsses.name
        sesid = ''

    # Get started
    plugin = {'phys2bidscoin': bidsmap['Options']['plugins']['phys2bidscoin']}
    datasource = bids.get_datasource(session, plugin)
    sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)]
    if not sourcefiles:
        LOGGER.info(f"No {__name__} sourcedata found in: {session}")
        return

    # Loop over all source data files and convert them to BIDS
    for sourcefile in sourcefiles:

        # Get a data source, a matching run from the bidsmap
        datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap, runtime=True)

        # Check if we should ignore this run
        if datasource.datatype in bidsmap['Options']['bidscoin'][
                'ignoretypes']:
            LOGGER.info(f"Leaving out: {sourcefile}")
            continue

        # Check that we know this run
        if not match:
            LOGGER.error(
                f"Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete the physiological output data in {bidsses} to solve this warning"
            )
            continue

        LOGGER.info(f"Processing: {sourcefile}")

        # Get an ordered list of the func runs from the scans.tsv file (which should have a standardized datetime format)
        scans_tsv = bidsses / f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
        if scans_tsv.is_file():
            scans_table = pd.read_csv(scans_tsv,
                                      sep='\t',
                                      index_col='filename')
            scans_table.sort_values(by=['acq_time', 'filename'], inplace=True)
        else:
            LOGGER.error(
                f"Could not read the TR's for phys2bids due to a missing '{scans_tsv}' file"
            )
            continue
        funcscans = []
        for index, row in scans_table.iterrows():
            if index.startswith('func/'):
                funcscans.append(index)

        # Then read the TR's from the associated func sidecar files
        tr = []
        for funcscan in funcscans:
            with (bidsses /
                  funcscan).with_suffix('.json').open('r') as json_fid:
                jsondata = json.load(json_fid)
            tr.append(jsondata['RepetitionTime'])

        # Create a heuristic function for phys2bids
        heur_str = ('def heur(physinfo, run=""):\n'
                    '    info = {}\n'
                    f'    if physinfo == "{sourcefile.name}":')
        for key, val in run['bids'].items():
            heur_str = (f'{heur_str}' f'\n        info["{key}"] = "{val}"')
        heur_str = f'{heur_str}\n    return info'

        # Write heuristic function as file in temporary folder
        heur_file = Path(
            tempfile.mkdtemp()) / f'heuristic_sub-{subid}_ses-{sesid}.py'
        heur_file.write_text(heur_str)

        # Run phys2bids
        physiofiles = phys2bids(
            filename=str(sourcefile),
            outdir=str(bidsfolder),
            heur_file=str(heur_file),
            sub=subid,
            ses=sesid,
            chtrig=int(run['meta'].get('TriggerChannel', 0)),
            num_timepoints_expected=run['meta'].get('ExpectedTimepoints',
                                                    None),
            tr=tr,
            pad=run['meta'].get('Pad', 9),
            ch_name=run['meta'].get('ChannelNames', []),
            yml='',
            debug=True,
            quiet=False)

        # Add user-specified meta-data to the newly produced json files (NB: assumes every physio-file comes with a json-file)
        for physiofile in physiofiles:
            jsonfile = Path(physiofile).with_suffix('.json')
            if not jsonfile.is_file():
                LOGGER.error(
                    f"Could not find the expected json sidecar-file: '{jsonfile}'"
                )
                continue
            with jsonfile.open('r') as json_fid:
                jsondata = json.load(json_fid)
            for metakey, metaval in run['meta'].items():
                metaval = datasource.dynamicvalue(metaval,
                                                  cleanup=False,
                                                  runtime=True)
                try:
                    metaval = ast.literal_eval(str(metaval))
                except (ValueError, SyntaxError):
                    pass
                LOGGER.info(f"Adding '{metakey}: {metaval}' to: {jsonfile}")
                if not metaval:
                    metaval = None
                jsondata[metakey] = metaval
            with jsonfile.open('w') as json_fid:
                json.dump(jsondata, json_fid, indent=4)
def bidsmapper_plugin(session: Path, bidsmap_new: dict, bidsmap_old: dict,
                      template: dict, store: dict) -> None:
    """
    All the logic to map the Nibabel header fields onto bids labels go into this function

    :param session:     The full-path name of the subject/session raw data source folder
    :param bidsmap_new: The new study bidsmap that we are building
    :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap
    :param template:    The template bidsmap with the default heuristics
    :param store:       The paths of the source- and target-folder
    :return:
    """

    # Get started
    plugin = {
        'nibabel2bids': bidsmap_new['Options']['plugins']['nibabel2bids']
    }
    datasource = bids.get_datasource(session, plugin, recurse=2)
    if not datasource.dataformat:
        return
    if not (template[datasource.dataformat]
            or bidsmap_old[datasource.dataformat]):
        LOGGER.error(
            f"No {datasource.dataformat} source information found in the bidsmap and template"
        )
        return

    # Collect the different DICOM/PAR source files for all runs in the session
    for sourcefile in [
            file for file in session.rglob('*') if is_sourcefile(file)
    ]:

        # See if we can find a matching run in the old bidsmap
        datasource = bids.DataSource(sourcefile, plugin, datasource.dataformat)
        run, match = bids.get_matching_run(datasource, bidsmap_old)

        # If not, see if we can find a matching run in the template
        if not match:
            run, _ = bids.get_matching_run(datasource, template)

        # See if we have collected the run somewhere in our new bidsmap
        if not bids.exist_run(bidsmap_new, '', run):

            # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample
            if not match:
                LOGGER.info(
                    f"Discovered '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}"
                )

            # Now work from the provenance store
            if store:
                targetfile = store['target'] / sourcefile.relative_to(
                    store['source'])
                targetfile.parent.mkdir(parents=True, exist_ok=True)
                run['provenance'] = str(shutil.copy2(sourcefile, targetfile))

            # Copy the filled-in run over to the new bidsmap
            bids.append_run(bidsmap_new, run)

        else:
            # Communicate with the user if the run was already present in bidsmap_old or in template
            LOGGER.debug(
                f"Known '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}"
            )