Ejemplo n.º 1
0
def anon_acqtimes(dset_dir):
    """
    Anonymize acquisition datetimes for a dataset.

    Anonymize acquisition datetimes for a dataset. Works for both longitudinal
    and cross-sectional studies. The time of day is preserved, but the first
    scan is set to January 1st, 1800. In a longitudinal study, each session is
    anonymized relative to the first session, so that time between sessions is
    preserved.

    Overwrites scan tsv files in dataset. Only run this *after* data collection
    is complete for the study, especially if it's longitudinal.

    Parameters
    ----------
    dset_dir : str
        Path to BIDS dataset to be anonymized.
    """
    bl_dt = parser.parse('1800-01-01')

    layout = BIDSLayout(dset_dir)
    subjects = layout.get_subjects()
    sessions = sorted(layout.get_sessions())

    for sub in subjects:
        if not sessions:
            scans_file = op.join(dset_dir, f'sub-{sub}/sub-{sub}_scans.tsv')
            df = pd.read_csv(scans_file, sep='\t')
            first_scan = df['acq_time'].min()
            first_dt, _ = parser.parse(first_scan.split('T'))
            diff = first_dt - bl_dt
            acq_times = df['acq_time'].apply(parser.parse)
            acq_times = (acq_times - diff).astype(str)
            df['acq_time'] = acq_times
            # df.to_csv(scans_file, sep='\t', index=False)
        else:
            # Separated from dataset sessions in case subject missed some
            sub_ses = sorted(layout.get_sessions(subject=sub))
            for i, ses in enumerate(sub_ses):
                scans_file = op.join(
                    dset_dir,
                    f'sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_scans.tsv')
                df = pd.read_csv(scans_file, sep='\t')
                if i == 0:
                    # Anonymize in terms of first scan for subject.
                    first_scan = df['acq_time'].min()
                    first_dt = parser.parse(first_scan.split('T')[0])
                    diff = first_dt - bl_dt

                acq_times = df['acq_time'].apply(parser.parse)
                acq_times = (acq_times - diff).astype(str)
                df['acq_time'] = acq_times
Ejemplo n.º 2
0
    def read_BIDS_coordinates(BIDS_path):
        """from BIDS_path np array coordinate arrays are read and returned in list respective to subjects
        
        Args:
            BIDS_path (string): absolute BIDS path
        
        Returns:
            coord_arr (np array): array with shape (len(subjects), 4), where indexes in the following order: left ecog, left stn, right ecog, right stn,
            coord_arr_names (np array): array with shape  (len(subjects), 2), where coord names are saved in order: left, right
        """

        layout = BIDSLayout(BIDS_path)
        subjects = layout.get_subjects()
        sessions = layout.get_sessions()
        coord_arr = np.empty(
            (len(subjects), 4),
            dtype=object)  # left ecog, left stn, right ecog, right stn
        coord_arr_names = np.empty((len(subjects), 2), dtype=object)

        for subject_idx, subject in enumerate(subjects):
            for sess in sessions:

                coord_path = os.path.join(BIDS_path, 'sub-' + subject,
                                          'ses-' + sess, 'eeg',
                                          'sub-' + subject + '_electrodes.tsv')

                print(coord_path)
                if os.path.exists(coord_path) is False:
                    continue
                df = pd.read_csv(coord_path, sep="\t")

                if sess == 'left':
                    if np.array(df['name'].str.contains("ECOG")).any():
                        coord_arr[subject_idx, 0] = np.ndarray.astype(
                            np.array(df[df['name'].str.contains("ECOG")])[:,
                                                                          1:4],
                            float
                        )  # [1:4] due to bipolar referencing (first electrode missing)
                    if np.array(df['name'].str.contains("STN")).any():
                        coord_arr[subject_idx, 1] = np.ndarray.astype(
                            np.array(df[df['name'].str.contains("STN")])[:,
                                                                         1:4],
                            float)
                    coord_arr_names[subject_idx, 0] = list(df['name'])
                elif sess == 'right':
                    if np.array(df['name'].str.contains("ECOG")).any():
                        coord_arr[subject_idx, 2] = np.ndarray.astype(
                            np.array(df[df['name'].str.contains("ECOG")])[:,
                                                                          1:4],
                            float)
                    if np.array(df['name'].str.contains("STN")).any():
                        coord_arr[subject_idx, 3] = np.ndarray.astype(
                            np.array(df[df['name'].str.contains("STN")])[:,
                                                                         1:4],
                            float)
                    coord_arr_names[subject_idx, 1] = list(df['name'])

        return coord_arr, coord_arr_names
Ejemplo n.º 3
0
def test_dcm2bids():
    # tmpBase = os.path.join(TEST_DATA_DIR, "tmp")
    # bidsDir = TemporaryDirectory(dir=tmpBase)
    bidsDir = TemporaryDirectory()

    tmpSubDir = os.path.join(bidsDir.name, DEFAULT.tmpDirName, "sub-01")
    shutil.copytree(os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir)

    app = Dcm2bids(
        [TEST_DATA_DIR],
        "01",
        os.path.join(TEST_DATA_DIR, "config_test.json"),
        bidsDir.name,
    )
    app.run()
    layout = BIDSLayout(bidsDir.name, validate=False)

    assert layout.get_subjects() == ["01"]
    assert layout.get_sessions() == []
    assert layout.get_tasks() == ["rest"]
    assert layout.get_runs() == [1, 2, 3]

    app = Dcm2bids(
        [TEST_DATA_DIR],
        "01",
        os.path.join(TEST_DATA_DIR, "config_test.json"),
        bidsDir.name,
    )
    app.run()

    fmapFile = os.path.join(bidsDir.name, "sub-01", "fmap",
                            "sub-01_echo-492_fmap.json")
    data = load_json(fmapFile)
    fmapMtime = os.stat(fmapFile).st_mtime
    assert data["IntendedFor"] == "dwi/sub-01_dwi.nii.gz"

    data = load_json(
        os.path.join(bidsDir.name, "sub-01", "localizer",
                     "sub-01_run-01_localizer.json"))
    assert data["ProcedureStepDescription"] == "Modify by dcm2bids"

    # rerun
    shutil.rmtree(tmpSubDir)
    shutil.copytree(os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir)

    app = Dcm2bids(
        [TEST_DATA_DIR],
        "01",
        os.path.join(TEST_DATA_DIR, "config_test.json"),
        bidsDir.name,
    )
    app.run()

    fmapMtimeRerun = os.stat(fmapFile).st_mtime
    assert fmapMtime == fmapMtimeRerun

    if os.name != 'nt':
        bidsDir.cleanup()
Ejemplo n.º 4
0
def test_dcm2bids():
    tmpBase = os.path.join(TEST_DATA_DIR, "tmp")
    #bidsDir = TemporaryDirectory(dir=tmpBase)
    bidsDir = TemporaryDirectory()

    tmpSubDir = os.path.join(bidsDir.name, DEFAULT.tmpDirName, "sub-01")
    shutil.copytree(
            os.path.join(TEST_DATA_DIR, "sidecars"),
            tmpSubDir)

    app = Dcm2bids(
            [TEST_DATA_DIR], "01",
            os.path.join(TEST_DATA_DIR, "config_test.json"),
            bidsDir.name
            )
    app.run()
    layout = BIDSLayout(bidsDir.name, validate=False)

    assert layout.get_subjects() == ["01"]
    assert layout.get_sessions() == []
    assert layout.get_tasks() == ["rest"]
    assert layout.get_runs() == [1,2,3]

    app = Dcm2bids(
            [TEST_DATA_DIR], "01",
            os.path.join(TEST_DATA_DIR, "config_test.json"),
            bidsDir.name
            )
    app.run()


    fmapFile = os.path.join(
            bidsDir.name, "sub-01", "fmap", "sub-01_echo-492_fmap.json")
    data = load_json(fmapFile)
    fmapMtime = os.stat(fmapFile).st_mtime
    assert data["IntendedFor"] == "dwi/sub-01_dwi.nii.gz"

    data = load_json(os.path.join(
        bidsDir.name, "sub-01", "localizer", "sub-01_run-01_localizer.json"))
    assert data["ProcedureStepDescription"] == "Modify by dcm2bids"

    #rerun
    shutil.rmtree(tmpSubDir)
    shutil.copytree(
            os.path.join(TEST_DATA_DIR, "sidecars"),
            tmpSubDir)

    app = Dcm2bids(
            [TEST_DATA_DIR], "01",
            os.path.join(TEST_DATA_DIR, "config_test.json"),
            bidsDir.name
            )
    app.run()

    fmapMtimeRerun = os.stat(fmapFile).st_mtime
    assert fmapMtime == fmapMtimeRerun

    bidsDir.cleanup()
Ejemplo n.º 5
0
def _fill_empty_lists(layout: BIDSLayout, subjects: list, tasks: list, sessions: list, runs: t.List[str]):
    """
    If filters are not provided by the user, load them from layout.
    """

    subjects = subjects if subjects else layout.get_subjects()
    tasks = tasks if tasks else layout.get_tasks()
    sessions = sessions if sessions else layout.get_sessions()
    runs = runs if runs else layout.get_runs()
    return subjects, tasks, sessions, runs
Ejemplo n.º 6
0
def test_generate_bids_skeleton(tmp_path, test_id, json_layout, n_files,
                                n_subjects, n_sessions):
    root = tmp_path / test_id
    generate_bids_skeleton(root, json_layout)
    datadesc = root / "dataset_description.json"
    assert datadesc.exists()
    assert "BIDSVersion" in datadesc.read_text()

    assert len([x for x in root.glob("**/*") if x.is_file()]) == n_files

    # ensure layout is valid
    layout = BIDSLayout(root)
    assert len(layout.get_subjects()) == n_subjects
    assert len(layout.get_sessions()) == n_sessions

    anat = layout.get(suffix="T1w", extension="nii.gz")[0]
    bold = layout.get(suffix="bold", extension="nii.gz")[0]
    assert anat.get_metadata()
    assert bold.get_metadata()
Ejemplo n.º 7
0
def main(**args):

    path = "/mnt/DATA_4Tera/Dati_Sherlock/bids/"

    derivatives = os.path.join(path, "derivatives", "afniproc")
    print("mkdir -p {}".format(derivatives))
    os.system("mkdir -p {}".format(derivatives))

    subj_deriv = os.path.join(derivatives, 'sub-{subject}', "ses-{session}")

    layout = BIDSLayout(path)

    subjects = layout.get_subjects()
    sessions = layout.get_sessions()

    for session in sessions:
        for subj in subjects:

            deriv_dir = subj_deriv.format(session=session, subject=subj)
            print("mkdir -p {}".format(deriv_dir))
            os.system("mkdir -p {}".format(deriv_dir))

            # Create anat and func
            anat_dir = os.path.join(subj_deriv,
                                    "{datatype}").format(session=session,
                                                         subject=subj,
                                                         datatype='anat')
            func_dir = os.path.join(subj_deriv,
                                    "{datatype}").format(session=session,
                                                         subject=subj,
                                                         datatype='func')

            print("mkdir -p {}".format(anat_dir))
            os.system("mkdir -p {}".format(anat_dir))

            print("mkdir -p {}".format(func_dir))
            os.system("mkdir -p {}".format(func_dir))

            # Check and convert T1 to send Freesurfer segmentation
            t1 = layout.get(subject=subj, session=session, suffix='T1w')[0]

            entities = t1.get_entities()
            pattern = os.path.join(
                subj_deriv, "{datatype}",
                "sub-{subject}[_ses-{session}][_desc-{desc}]_{suffix}.{extension}"
            )
            entities['desc'] = 'fsprep'
            t1_fs = layout.build_path(entities, pattern, validate=False)

            entities['extension'] = 'txt'
            t1_log = layout.build_path(entities, pattern, validate=False)

            command = "check_dset_for_fs.py -input %s -fix_all -fix_out_prefix %s -fix_out_vox_dim 1 -verb > %s"
            command = command % (t1.path, t1_fs, t1_log)
            print(command)
            os.system(command)

            check_fs(layout, subj, session, subj_deriv)

            ### T1 to MNI space coreg
            entities = t1.get_entities()
            entities['desc'] = 'fsprep'
            entities['space'] = 'MNI152'

            pattern = os.path.join(
                subj_deriv, "{datatype}",
                "sub-{subject}[_ses-{session}][_space-{space}][_desc-{desc}]_{suffix}.{extension}"
            )
            t1_mni = layout.build_path(entities, pattern, validate=False)

            command = "@auto_tlrc -base MNI152_2009_template.nii.gz -pad_base 35 -prefix {prefix} -input {input}"
            command = command.format(prefix=t1_mni, input=t1_fs)
            print(command)
            os.system(command)

            runs = layout.get_runs()
            ordered_bold = []
            for run in runs:
                fname = layout.get(session=session,
                                   subject=subj,
                                   run=run,
                                   suffix='bold')
                if len(fname) != 0:
                    ordered_bold.append(fname[0])

            # Slice time correction - motion correction - align EPI to Anat to MNI
            bold = layout.get(subject=subj,
                              session=session,
                              suffix='bold',
                              extension='nii.gz')

            # 1D File
            slice_timing = np.array(bold[0].get_metadata()['SliceTiming'])
            slice_fname = os.path.join(path, "slice_timing.txt")
            np.savetxt(slice_fname,
                       slice_timing,
                       delimiter=' ',
                       newline=' ',
                       fmt='%.5f')

            epi = layout.get(subject=subj,
                             session=session,
                             suffix='bold',
                             extension='nii.gz',
                             run=1)[0].path
            child_epi = " ".join([b.path for b in ordered_bold])

            command = "align_epi_anat.py -anat {anat} -epi {epi} -child_epi {child_epi}"+\
                      " -epi_base 0 -tshift_opts -tpattern {tpattern} -epi2anat -giant_move"+\
                      " -tlrc_apar {tlrc_apar}"
            command = command.format(anat=t1_fs,
                                     epi=epi,
                                     child_epi=child_epi,
                                     tpattern=slice_fname,
                                     tlrc_apar=t1_mni)
            print(command)
            os.system(command)

            # Create mask
            automask = " ".join(
                [b.filename[:-7] + '_tlrc_al+tlrc.HEAD' for b in bold])
            mean_mask_prefix = 'mean_mni.nii.gz'
            command = '3dTstat -prefix {prefix} {input}'.format(
                prefix=mean_mask_prefix, input=automask)
            print(command)
            os.system(command)

            entities = bold[0].get_entities()
            entities['suffix'] = 'mask'
            entities['space'] = 'MNI152'

            pattern = os.path.join(
                subj_deriv, "{datatype}",
                "sub-{subject}[_ses-{session}][_space-{space}][_desc-{desc}]_{suffix}.{extension}"
            )
            mask_prefix = layout.build_path(entities, pattern, validate=False)
            command = '3dAutomask -prefix {prefix} {input}'.format(
                input=mean_mask_prefix, prefix=mask_prefix)
            print(command)
            os.system(command)

            # Clean files
            removed = " ".join([b.filename[:-7] + '_al+orig.*' for b in bold])
            command = "rm " + removed
            print(command)
            os.system(command)

            command = "rm __tt_*.*"
            print(command)
            os.system(command)

            command = "rm malldump.*"
            print(command)
            os.system(command)

            # Put files in BIDS
            header = [
                'trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z'
            ]
            motion_ordered = list()
            for bold in ordered_bold:

                motion_fname = bold.filename[:-7] + "_vr_motion.1D"
                motion = np.genfromtxt(motion_fname)

                motion_ordered.append(motion_fname)

                entities = bold.get_entities()
                entities['suffix'] = 'motion'
                entities['desc'] = 'volreg'
                entities['extension'] = 'tsv'

                pattern = os.path.join(
                    subj_deriv, "{datatype}",
                    "sub-{subject}_ses-{session}_task-{task}_run-{run:02d}_desc-{desc}_{suffix}.{extension}"
                )
                motion_bids = pattern.format(**entities)

                motion = motion[:, [3, 4, 5, 0, 1, 2]]
                np.savetxt(motion_bids,
                           motion,
                           fmt="%f",
                           delimiter="\t",
                           header="\t".join(header))

                print("rm " + motion_fname)
                os.system("rm " + motion_fname)

                for desc in ['mat', "reg_mat", "tlrc_mat"]:
                    affine_fname = bold.filename[:-7] + "_al_" + desc + ".aff12.1D"
                    entities = bold.get_entities()
                    entities['suffix'] = 'affine'
                    entities['desc'] = desc.replace("_", "")
                    entities['extension'] = 'tsv'

                    pattern = os.path.join(
                        subj_deriv, "{datatype}",
                        "sub-{subject}_ses-{session}_task-{task}_run-{run:02d}_desc-{desc}_{suffix}.{extension}"
                    )
                    affine_bids = pattern.format(**entities)

                    command = "mv {0} {1}".format(affine_fname, affine_bids)
                    print(command)
                    os.system(command)

                afni_bold = bold.filename[:-7] + "_tlrc_al+tlrc"
                entities = bold.get_entities()
                entities['desc'] = "afniproc"
                entities['extension'] = 'nii.gz'
                entities['space'] = 'MNI152'
                pattern = os.path.join(
                    subj_deriv, "{datatype}",
                    "sub-{subject}_ses-{session}_task-{task}_run-{run:02d}_space-{space}_desc-{desc}_{suffix}.{extension}"
                )

                afni_bids = pattern.format(**entities)
                command = "3dcopy {0} {1}".format(afni_bold, afni_bids)
                print(command)
                os.system(command)

                print("rm {}*".format(afni_bold))
                os.system("rm {}*".format(afni_bold))

            # Create confound regressors
            # Motion

            motion_files = list()
            for run in runs:
                f = layout.get(subject=subj,
                               session=session,
                               task=session,
                               run=run,
                               suffix='motion')
                if len(f) != 0:
                    motion_files.append(f[0])

            motion_df = [
                pd.read_csv(m.path, delimiter="\t") for m in motion_files
            ]

            motion_demean = [m - m.mean(0) for m in motion_df]
            motion_demean = pd.concat(motion_demean)

            entities = motion_files[0].get_entities()
            entities['desc'] = 'demean'

            pattern = os.path.join(
                subj_deriv, "{datatype}",
                "sub-{subject}_ses-{session}_desc-{desc}_{suffix}.{extension}")
            demean_fname = pattern.format(**entities)
            motion_demean.to_csv(demean_fname,
                                 header=False,
                                 index=False,
                                 sep="\t")

            motion_deriv = [m.diff() for m in motion_df]
            motion_deriv = [m.fillna(0) for m in motion_deriv]
            motion_deriv = [m - m.mean(0) for m in motion_deriv]
            motion_deriv = pd.concat(motion_deriv)

            entities['desc'] = 'deriv'
            deriv_fname = pattern.format(**entities)
            motion_deriv.to_csv(deriv_fname,
                                header=False,
                                index=False,
                                sep="\t")

            tr_counts = [m.shape[0] for m in motion_df]

            for j, t in enumerate(tr_counts):
                command = "1dBport -nodata {ntr} 1 -band 0.01 999 -invert -nozero > bpass.1D".format(
                    ntr=t)
                print(command)
                os.system(command)

                command = "1d_tool.py -infile bpass.1D -pad_into_many_runs {run:1d} {n_runs}"+\
                          " -set_run_lengths {tr_counts} -write bpass.{run:02d}.1D"
                command = command.format(run=j + 1,
                                         n_runs=str(len(tr_counts)),
                                         tr_counts=" ".join(
                                             [str(t) for t in tr_counts]))
                print(command)
                os.system(command)

            entities['desc'] = 'bpass'
            entities['suffix'] = 'timeseries'
            entities['extension'] = '1D'
            bpass_fname = pattern.format(**entities)

            command = "1dcat bpass.*.1D > {}".format(bpass_fname)
            print(command)
            os.system(command)

            command = "rm bpass*"
            print(command)
            os.system(command)
Ejemplo n.º 8
0
def main(**args):

    outfiles = ['fitts', 'errts', 'stats', 'betas']

    path = args['path']
    pipeline = args['pipeline']

    command = '3dDeconvolve -input {files} -jobs {n_jobs} -polort {polort} -float {confounds} {events_string} '+ \
              ' -mask {mask} -allzero_OK -fout -tout -x1D {design_matrix_txt} -xjpeg {design_matrix_jpg} -xsave '+\
              '-fitts {fitts} -errts {errts} -bucket {stats} -cbucket {betas} -rout -gltsym "SYM: RESP+L -RESP+R" -glt_label 1 RespLvsRespR'

    extra_event = process_extraevent_arg(args['extra_event'])

    derivatives = os.path.join(path, "derivatives", pipeline)
    print("mkdir -p {}".format(derivatives))
    os.system("mkdir -p {}".format(derivatives))

    derivatives_pattern = os.path.join(derivatives, 'sub-{subject}',
                                       "ses-{session}")
    pattern = os.path.join(
        derivatives_pattern, "{datatype}",
        "sub-{subject}[_ses-{session}][_space-{space}][_desc-{desc}]_{suffix}.{extension}"
    )

    layout = BIDSLayout(path, derivatives=True)

    subjects = layout.get_subjects()
    subjects.remove('lormat')
    sessions = layout.get_sessions()

    # TODO: Check if there are sessions
    for session in sessions:
        for subj in subjects:

            deriv_dir = derivatives_pattern.format(session=session,
                                                   subject=subj)
            print("mkdir -p {}".format(deriv_dir))
            os.system("mkdir -p {}".format(deriv_dir))

            # Create func
            func_dir = os.path.join(derivatives_pattern,
                                    "{datatype}").format(session=session,
                                                         subject=subj,
                                                         datatype='func')

            print("mkdir -p {}".format(func_dir))
            os.system("mkdir -p {}".format(func_dir))

            # Main command
            files = layout.get(subject=subj,
                               session=session,
                               task=session,
                               desc='afniproc',
                               extension='nii.gz')
            entities = files[0].get_entities()
            files = " ".join(f.path for f in files)

            args['files'] = files

            confounds = ''
            for desc in ['bpass', 'demean']:
                ort_files = layout.get(subject=subj,
                                       session=session,
                                       desc=desc)
                confounds += '-ortvec {} {} '.format(ort_files[0].path, desc)

            args['confounds'] = confounds

            # Stimuli
            stims = bids2afni_events(subj,
                                     session,
                                     layout,
                                     pattern,
                                     extra_event=extra_event)

            write_afni(stims)
            args['events_string'] = stims_times(stims)

            # Mask
            mask = layout.get(subject=subj,
                              session=session,
                              suffix='mask',
                              extension='nii.gz')
            args['mask'] = mask[0].path

            # Buckets
            for desc in outfiles:
                entities['desc'] = pipeline
                entities['suffix'] = desc
                args[desc] = layout.build_path(entities,
                                               pattern,
                                               validate=False)

            for extension in ['jpg', 'txt']:
                entities['suffix'] = 'dmatrix'
                entities['extension'] = extension
                args['design_matrix_' + extension] = layout.build_path(
                    entities, pattern, validate=False)

            print(command.format(**args))
            os.system(command.format(**args))
Ejemplo n.º 9
0
def bidsmri2project(directory, args):

    # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not
    cde=Graph()

    # Parse dataset_description.json file in BIDS directory
    if (os.path.isdir(os.path.join(directory))):
        try:
            with open(os.path.join(directory,'dataset_description.json')) as data_file:
                dataset = json.load(data_file)
        except OSError:
            logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec")
            exit("-1")
    else:
        logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory))
        exit("-1")

    # create project / nidm-exp doc
    project = Project()

    # if there are git annex sources then add them
    num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory)
    # else just add the local path to the dataset
    if num_sources == 0:
        project.add_attributes({Constants.PROV['Location']:"file:/" + directory})


    # add various attributes if they exist in BIDS dataset
    for key in dataset:
        # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])})
            else:
                project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]})




    # get BIDS layout
    bids_layout = BIDSLayout(directory)


    # create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session={}
    participant={}
    # Parse participants.tsv file in BIDS directory and create study and acquisition objects
    if os.path.isfile(os.path.join(directory,'participants.tsv')):
        with open(os.path.join(directory,'participants.tsv')) as csvfile:
            participants_data = csv.DictReader(csvfile, delimiter='\t')

            # logic to map variables to terms.
            # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not.  For those that are not
            # we want to use the variable-term mapping functions to help the user do the mapping
            # iterate over columns
            mapping_list=[]
            column_to_terms={}
            for field in participants_data.fieldnames:

                # column is not in BIDS_Constants
                if not (field in BIDS_Constants.participants):
                    # add column to list for column_to_terms mapping
                    mapping_list.append(field)



            #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use
            if args.json_map == False:
                #defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                # if participants.json file doesn't exist then run without json mapping file
                if not os.path.isfile(os.path.join(directory,'participants.json')):
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True)
                else:
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True)
            else:
                #maps variables in CSV file to terms
                temp=DataFrame(columns=mapping_list)
                if args.no_concepts:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True,associate_concepts=False)
                else:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True)


            for row in participants_data:
                #create session object for subject to be used for participant metadata and image data
                #parse subject id from "sub-XXXX" string
                temp = row['participant_id'].split("-")
                #for ambiguity in BIDS datasets.  Sometimes participant_id is sub-XXXX and othertimes it's just XXXX
                if len(temp) > 1:
                    subjid = temp[1]
                else:
                    subjid = temp[0]
                logging.info(subjid)
                session[subjid] = Session(project)

                #add acquisition object
                acq = AssessmentAcquisition(session=session[subjid])

                acq_entity = AssessmentObject(acquisition=acq)
                participant[subjid] = {}
                participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

                # add nfo:filename entry to assessment entity to reflect provenance of where this data came from
                acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)})
                #acq_entity.add_attributes({Constants.NIDM_FILENAME:os.path.join(directory,'participants.tsv')})

                #add qualified association of participant with acquisition activity
                acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT)
                # print(acq)

                # if there are git annex sources for participants.tsv file then add them
                num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                # else just add the local path to the dataset
                if num_sources == 0:
                    acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')})

                 # if there's a JSON sidecar file then create an entity and associate it with all the assessment entities
                if os.path.isfile(os.path.join(directory,'participants.json')):
                    json_sidecar = AssessmentObject(acquisition=acq)
                    json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME:
                        getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)})

                    # add Git Annex Sources
                    # if there are git annex sources for participants.tsv file then add them
                    num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory)
                    # else just add the local path to the dataset
                    if num_sources == 0:
                        json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')})


                # check if json_sidecar entity exists and if so associate assessment entity with it
                if 'json_sidecar' in  locals():
                    #connect json_entity with acq_entity
                    acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar})

                for key,value in row.items():
                    if not value:
                        continue
                    #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user
                    #supplied arguments to map variables
                    if key in BIDS_Constants.participants:
                        # WIP
                        # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for each BIDS-compliant dataset

                        if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):


                            # create a namespace with the URL for fixed BIDS_Constants term
                            # item_ns = Namespace(str(Constants.BIDS.namespace.uri))
                            # add prefix to namespace which is the BIDS fixed variable name
                            # cde.bind(prefix="bids", namespace=item_ns)
                            # ID for BIDS variables is always the same bids:[bids variable]
                            cde_id = Constants.BIDS[key]
                            # add the data element to the CDE graph
                            cde.add((cde_id,RDF.type, Constants.NIDM['DataElement']))
                            cde.add((cde_id,RDF.type, Constants.PROV['Entity']))
                            # add some basic information about this data element
                            cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart)))
                            cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri)))
                            cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key)))
                            cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier")))
                            cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification")))
                            cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"])))

                            acq_entity.add_attributes({cde_id:Literal(value)})

                        # if this was the participant_id, we already handled it above creating agent / qualified association
                        # if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):
                        #    acq_entity.add_attributes({BIDS_Constants.participants[key]:value})


                    # else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used
                    # in CSV2NIDM.py)
                    else:

                        # WIP: trying to add new support for CDEs...
                        add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value)
                        # if key in column_to_terms:
                        #    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value})
                        # else:

                        #    acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value})


    # create acquisition objects for each scan for each subject

    # loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        logging.info("Converting subject: %s" %subject_id)
        # skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue

        # check if there are a session numbers.  If so, store it in the session activity and create a new
        # sessions for these imaging acquisitions.  Because we don't know which imaging session the root
        # participants.tsv file data may be associated with we simply link the imaging acquisitions to different
        # sessions (i.e. the participants.tsv file goes into an AssessmentAcquisition and linked to a unique
        # sessions and the imaging acquisitions go into MRAcquisitions and has a unique session)
        imaging_sessions = bids_layout.get_sessions(subject=subject_id)
        # if session_dirs has entries then get any metadata about session and store in session activity

        # bids_layout.get(subject=subject_id,type='session',extensions='.tsv')
        # bids_layout.get(subject=subject_id,type='scans',extensions='.tsv')
        # bids_layout.get(extensions='.tsv',return_type='obj')

        # loop through each session if there is a sessions directory
        if len(imaging_sessions) > 0:
            for img_session in imaging_sessions:
                # create a new session
                ses = Session(project)
                # add session number as metadata
                ses.add_attributes({Constants.BIDS['session_number']:img_session})
                addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session)
        # else we have no ses-* directories in the BIDS layout
        addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory)



        # Added temporarily to support phenotype files
        # for each *.tsv / *.json file pair in the phenotypes directory
        # WIP: ADD VARIABLE -> TERM MAPPING HERE
        for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")):
            # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            # the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        # add acquisition object
                        acq = AssessmentAcquisition(session=session[subjid[1]])
                        # add qualified association with person
                        acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT)

                        acq_entity = AssessmentObject(acquisition=acq)



                        for key,value in row.items():
                            if not value:
                                continue
                            # we're using participant_id in NIDM in agent so don't add to assessment as a triple.
                            # BIDS phenotype files seem to have an index column with no column header variable name so skip those
                            if ((not key == "participant_id") and (key != "")):
                                # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes({Constants.BIDS[key]:value})

                        # link TSV file
                        acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)})
                        #acq_entity.add_attributes({Constants.NIDM_FILENAME:tsv_file})

                        # if there are git annex sources for participants.tsv file then add them
                        num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                        # else just add the local path to the dataset
                        if num_sources == 0:
                            acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file})


                        # link associated JSON file if it exists
                        data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json")
                        if os.path.isfile(data_dict):
                            # if file exists, create a new entity and associate it with the appropriate activity  and a used relationship
                            # with the TSV-related entity
                            json_entity = AssessmentObject(acquisition=acq)
                            json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME:
                                getRelPathToBIDS(data_dict,directory)})

                            # add Git Annex Sources
                            # if there are git annex sources for participants.tsv file then add them
                            num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory)
                            # else just add the local path to the dataset
                            if num_sources == 0:
                                json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict})

                            #connect json_entity with acq_entity
                            acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()})


    return project, cde
Ejemplo n.º 10
0
def create_pipeline_SS_TV(bids_dir,
                          work_dir,
                          out_dir,
                          subjects,
                          sessions,
                          mag_match_pattern,
                          phase_match_pattern,
                          mask_match_pattern,
                          keep_unnecessary_outputs,
                          FAST_bias_iters,
                          FAST_bias_lowpass,
                          FAST_num_classes,
                          skip_fast,
                          brain_extract_method,
                          BET_frac,
                          single_subject_custom_mask,
                          freq_weights__snr_window_sz,
                          truncate_echo,
                          SS_TV_lagrange_parameter,
                          B0_dir,
                          scnd_diff_reliability_thresh_noise,
                          trim_radius_sz,
                          scnd_diff_reliability_thresh_trim,
                          skip_qsm,
                          skip_r2star,
                          matlab_executable,
                          mcr_location,
                          run_mode):
    layout = BIDSLayout(bids_dir)

    ### CREATE PIPELINE OBJECT
    pipelineDir = work_dir
    wf = pe.Workflow(name="SS_TV")
    wf.base_dir = pipelineDir
    wf.config['execution']['remove_unnecessary_outputs'] = not keep_unnecessary_outputs

    ### GET MULTI-ECHO DATA
    # can we do this more elegantly?
    first_echo_files = []
    for subject in subjects:
        if layout.get_sessions(subject=subject) == []:
            if sessions == ['.*']:
                first_echo_files = first_echo_files + layout.get(subject=subject, modality='anat',
                                                                 extensions='.*part-phase.*echo-0*1.*.nii.*', )
            else:
                print(
                    "Warning: Session filter applied, but subject " + subject + " has no bids session information. This subject has been ignored.")
        else:
            for session in sessions:
                first_echo_files = first_echo_files + layout.get(subject=subject, session=session, modality='anat',
                                                                 extensions='.*part-phase.*echo-0*1.*.nii.*', )
    anat_folders = []
    for img in first_echo_files:
        full_dirname = os.path.dirname(img.filename)
        remove_base_dir = full_dirname.replace(bids_dir, '')
        remove_leading_slash = remove_base_dir.lstrip(os.sep)
        anat_folders.append(remove_leading_slash)

    anat_folders = list(set(anat_folders))
    anat_folders.sort()

    # IdentityInterface is useful for passing subject directory structure to datasink
    infosource = pe.Node(niu.IdentityInterface(fields=['subject_id']), name="infosource")
    infosource.iterables = ('subject_id', anat_folders)

    ### NODES AND PARAMETERS
    if brain_extract_method == BrainExtractMethod.BIDS:
        datasource = pe.Node(
            nio.DataGrabber(infields=['subject_id'],
                            outfields=['phase_images', 'mag_images', 'phase_jsons', 'mag_jsons', 'brain_mask']),
            name='datasource')
        datasource.inputs.field_template = dict(
            phase_images='%s/' + phase_match_pattern + '.nii*',
            phase_jsons='%s/' + phase_match_pattern + '.json',
            mag_images='%s/' + mag_match_pattern + '.nii*',
            mag_jsons='%s/' + mag_match_pattern + '.json',
            brain_mask='%s/' + mask_match_pattern + '.nii*',
        )
    else:
        datasource = pe.Node(
            nio.DataGrabber(infields=['subject_id'],
                            outfields=['phase_images', 'mag_images', 'phase_jsons', 'mag_jsons']),
            name='datasource')
        datasource.inputs.field_template = dict(
            phase_images='%s/' + phase_match_pattern + '.nii*',
            phase_jsons='%s/' + phase_match_pattern + '.json',
            mag_images='%s/' + mag_match_pattern + '.nii*',
            mag_jsons='%s/' + mag_match_pattern + '.json',
        )
    datasource.inputs.sort_filelist = True
    datasource.inputs.template = "*"
    datasource.inputs.base_directory = bids_dir

    # this node must change depending on the scanner vendor
    susc_phase_preprocess = pe.Node(SiemensPhasePreprocess(), name='susc_phase_preprocess')

    avg_and_freq_estimate_weights = pe.Node(GetAvgAndSNRMap(), name='avg_and_freq_estimate_weights')
    avg_and_freq_estimate_weights.inputs.snr_window_sz = freq_weights__snr_window_sz
    avg_and_freq_estimate_weights.inputs.avg_out_filename = "avg.nii.gz"
    avg_and_freq_estimate_weights.inputs.snr_map_out_filename = "weights.nii.gz"

    wf.connect([
        (infosource, datasource, [('subject_id', 'subject_id')]),
        (datasource, avg_and_freq_estimate_weights, [('mag_images', 'mag')]),
        (datasource, susc_phase_preprocess, [('phase_images', 'infiles')])
    ])

    if brain_extract_method == BrainExtractMethod.BET:
        brain_extract = pe.Node(fsl.BET(), name='brain_extract_bet')
        brain_extract.inputs.frac = BET_frac
        brain_extract.inputs.mask = True
        brain_extract.inputs.robust = True

        if skip_fast:
            # connect avg directly to bet (skip FAST if image uniform enough for brain extraction)
            wf.connect([
                (avg_and_freq_estimate_weights, brain_extract, [('avg_out_filename', 'in_file')])
            ])
        else:
            # connect avg to nu correction, connect nu correction to bet
            """
            #spm worked better for varian 7T data
            #if using spm, these prameters are needed
            bias_regularization=.001
            sampling_distance=2.0
            bias_fwhm=30

            nonuniformityCorrect_spm=pe.Node(spm.preprocess.Segment(),name='nonuniformityCorrect_spm')
            nonuniformityCorrect_spm.inputs.bias_regularization=bias_regularization
            nonuniformityCorrect_spm.inputs.sampling_distance=sampling_distance
            nonuniformityCorrect_spm.inputs.bias_fwhm=bias_fwhm
            nonuniformityCorrect_spm.inputs.save_bias_corrected=True
            """
            nonuniformity_correct_fsl = pe.Node(fsl.FAST(), name='nonuniformity_correct_fsl')
            nonuniformity_correct_fsl.inputs.img_type = 2  # 1 for t1, 2 for t2
            nonuniformity_correct_fsl.inputs.bias_iters = FAST_bias_iters  # higher for larger nonuniformity
            nonuniformity_correct_fsl.inputs.bias_lowpass = FAST_bias_lowpass  # spm uses 30
            nonuniformity_correct_fsl.inputs.number_classes = FAST_num_classes  # spm uses 5
            nonuniformity_correct_fsl.inputs.output_biasfield = True
            nonuniformity_correct_fsl.inputs.output_biascorrected = True
            nonuniformity_correct_fsl.interface.estimated_memory_gb = 10

            wf.connect([
                # spm requires matlab
                # (avg_and_freq_estimate_weights, nonuniformityCorrect_spm, [('avgOutFilename', 'data')]),
                # (nonuniformityCorrect_spm, brain_extract, [('bias_corrected_image', 'in_file')]),
                (avg_and_freq_estimate_weights, nonuniformity_correct_fsl, [('avg_out_filename', 'in_files')]),
                (nonuniformity_correct_fsl, brain_extract, [('restored_image', 'in_file')])
            ])
    elif brain_extract_method == BrainExtractMethod.BIDS:
        brain_extract = pe.Node(
            nio.DataGrabber(infields=['subject_id'],
                            outfields=['mask_file']),
            name='bids_brain_mask')
        brain_extract.inputs.field_template = dict(
            mask_file='%s/' + mask_match_pattern + '.nii*',
        )
        brain_extract.inputs.sort_filelist = False
        brain_extract.inputs.template = "*"
        brain_extract.inputs.base_directory = bids_dir
        wf.connect([
            (infosource, brain_extract, [('subject_id', 'subject_id')]),
        ])

    elif brain_extract_method == BrainExtractMethod.SINGLE_SUBJECT_FULL_PATH:
        brain_extract = pe.Node(niu.IdentityInterface(fields=['mask_file']), name="fullpath_brain_mask")
        brain_extract.inputs.mask_file = single_subject_custom_mask

    freq_est = pe.Node(EstimateFrequencyFromWrappedPhase(), 'freq_est')
    freq_est.inputs.truncate_echo = truncate_echo
    freq_est.inputs.freq_filename = "freq_est.nii.gz"
    freq_est.interface.estimated_memory_gb = 4

    fieldmap_reorient = pe.Node(fsl.Reorient2Std(), name='fieldmap_reorient')

    datasink = pe.Node(nio.DataSink(), name="datasink")
    datasink.inputs.base_directory = out_dir + '/qsm_sstv/'
    datasink.inputs.parameterization = False

    rename_infosource = pe.Node(replace_slash, "rename_infosource")
    rename_fieldmap = pe.Node(niu.Rename(format_string="%(subject_id)s-fieldmap", keep_ext=True), "rename_fieldmap")

    wf.connect([
        (susc_phase_preprocess, freq_est, [('outfiles', 'phase')]),
        (datasource, freq_est, [('phase_jsons', 'json')]),
        (brain_extract, freq_est, [('mask_file', 'mask')]),
        (avg_and_freq_estimate_weights, freq_est, [('snr_map_out_filename', 'weight')]),
        (freq_est, fieldmap_reorient, [('freq_filename', 'in_file')]),
        # rename files and data sink
        (infosource, rename_infosource, [('subject_id', 'filename')]),
        # fieldmap
        (rename_infosource, rename_fieldmap, [('renamed', 'subject_id')]),
        (fieldmap_reorient, rename_fieldmap, [('out_file', 'in_file')]),
        (rename_fieldmap, datasink, [('out_file', '@')]),
        (infosource, datasink, [('subject_id', 'container')]),
    ])

    if not (skip_qsm and skip_r2star):
        trim_mask = pe.Node(TrimMaskUsingReliability(), name='trim_mask')
        trim_mask.inputs.erosion_sz = trim_radius_sz  # in mm
        trim_mask.inputs.threshold = scnd_diff_reliability_thresh_trim
        trim_mask.inputs.trimmed_mask_filename = "trim_mask.nii.gz"
        trim_mask.inputs.reliability_filename = "unreliableMap.nii.gz"
        trim_mask.interface.estimated_memory_gb = 25

        wf.connect([
            (freq_est, trim_mask, [('freq_filename', 'phase')]),
            (brain_extract, trim_mask, [('mask_file', 'mask')])
        ])

    if not skip_qsm:
        unreliable_fieldmap_voxels = pe.Node(CalculateReliabilityMask(), name='unreliable_fieldmap_voxels')
        unreliable_fieldmap_voxels.inputs.threshold = scnd_diff_reliability_thresh_noise
        unreliable_fieldmap_voxels.inputs.reliability_mask_filename = "unreliableMask.nii.gz"
        unreliable_fieldmap_voxels.inputs.reliability_filename = "unreliableMap.nii.gz"

        CF_value = pe.Node(GetCFFromJson, name='CFValue')

        susceptibility = pe.Node(SS_TV(run_mode, matlab_executable, mcr_location), name='susceptibility')
        susceptibility.inputs.alpha = SS_TV_lagrange_parameter
        susceptibility.inputs.B0_dir = B0_dir
        susceptibility.inputs.susceptibility_filename = 'susceptibilityMap.nii.gz'
        susceptibility.interface.estimated_memory_gb = 10

        QSM_reorient = pe.Node(fsl.Reorient2Std(), name='QSM_reorient')
        QSM_brain_mask_reorient = pe.Node(fsl.Reorient2Std(), name='QSM_brain_mask_reorient')
        QSM_noise_mask_reorient = pe.Node(fsl.Reorient2Std(), name='QSM_noise_mask_reorient')

        rename_QSM = pe.Node(niu.Rename(format_string="%(subject_id)s-QSM", keep_ext=True), "rename_QSM")
        rename_QSM_brain_mask = pe.Node(niu.Rename(format_string="%(subject_id)s-QSM_brainMask", keep_ext=True),
                                        "rename_QSM_brain_mask")
        rename_QSM_noise_mask = pe.Node(niu.Rename(format_string="%(subject_id)s-QSM_noiseMask", keep_ext=True),
                                        "rename_QSM_noise_mask")
        wf.connect([
            (freq_est, unreliable_fieldmap_voxels, [('freq_filename', 'phase')]),
            (brain_extract, unreliable_fieldmap_voxels, [('mask_file', 'mask')]),
            (freq_est, susceptibility, [('freq_filename', 'freq_loc')]),
            (datasource, CF_value, [('mag_jsons', 'filename')]),
            (unreliable_fieldmap_voxels, susceptibility, [('reliability_mask_filename', 'reliability_mask_loc')]),
            (trim_mask, susceptibility, [('trimmed_mask_filename', 'mask_loc')]),
            (CF_value, susceptibility, [('CF_value', 'CF')]),

            (susceptibility, QSM_reorient, [('susceptibility_filename', 'in_file')]),
            (trim_mask, QSM_brain_mask_reorient, [('trimmed_mask_filename', 'in_file')]),
            (unreliable_fieldmap_voxels, QSM_noise_mask_reorient, [('reliability_mask_filename', 'in_file')]),

            # qsm
            (rename_infosource, rename_QSM, [('renamed', 'subject_id')]),
            (QSM_reorient, rename_QSM, [('out_file', 'in_file')]),
            (rename_QSM, datasink, [('out_file', '@.@qsm')]),
            # qsm brain mask
            (rename_infosource, rename_QSM_brain_mask, [('renamed', 'subject_id')]),
            (QSM_brain_mask_reorient, rename_QSM_brain_mask, [('out_file', 'in_file')]),
            (rename_QSM_brain_mask, datasink, [('out_file', '@.@qsm_brain')]),
            # qsm noisey voxels in fieldmap
            (rename_infosource, rename_QSM_noise_mask, [('renamed', 'subject_id')]),
            (QSM_noise_mask_reorient, rename_QSM_noise_mask, [('out_file', 'in_file')]),
            (rename_QSM_noise_mask, datasink, [('out_file', '@.@qsm_noise')]),
        ])

    if not skip_r2star:
        R2Star = pe.Node(CalcR2Star(), 'R2Star')
        R2Star.inputs.R2star = 'R2star.nii.gz'
        R2Star.inputs.neg_mask = 'negMask.nii.gz'
        R2Star.inputs.nan_mask = 'nanMask.nii.gz'
        # R2Star.interface.estimated_memory_gb = 5

        R2star_reorient = pe.Node(fsl.Reorient2Std(), name='R2star_reorient')
        R2star_fit_reorient = pe.Node(fsl.Reorient2Std(), name='R2star_fit_reorient')
        R2star_neg_mask_reorient = pe.Node(fsl.Reorient2Std(), name='R2star_neg_mask_reorient')

        rename_R2star = pe.Node(niu.Rename(format_string="%(subject_id)s-R2star", keep_ext=True), "rename_R2star")
        rename_R2star_fit = pe.Node(niu.Rename(format_string="%(subject_id)s-R2star_fit", keep_ext=True),
                                    "rename_R2star_fit")
        rename_R2star_neg_mask = pe.Node(niu.Rename(format_string="%(subject_id)s-R2star_negMask", keep_ext=True),
                                         "rename_R2star_neg_mask")

        wf.connect([
            (datasource, R2Star, [('mag_images', 'mag')]),
            (susc_phase_preprocess, R2Star, [('outfiles', 'phase')]),
            (freq_est, R2Star, [('freq_filename', 'freq_loc')]),
            (trim_mask, R2Star, [('trimmed_mask_filename', 'mask')]),
            (datasource, R2Star, [('mag_jsons', 'json')]),
            (R2Star, R2star_reorient, [('R2star', 'in_file')]),
            (R2Star, R2star_fit_reorient, [('R2star_fit', 'in_file')]),
            (R2Star, R2star_neg_mask_reorient, [('neg_mask', 'in_file')]),
            # r2star
            (rename_infosource, rename_R2star, [('renamed', 'subject_id')]),
            (R2star_reorient, rename_R2star, [('out_file', 'in_file')]),
            (rename_R2star, datasink, [('out_file', '@.@r2star')]),
            # r2star fit map
            (rename_infosource, rename_R2star_fit, [('renamed', 'subject_id')]),
            (R2star_fit_reorient, rename_R2star_fit, [('out_file', 'in_file')]),
            (rename_R2star_fit, datasink, [('out_file', '@.@r2starfit')]),
            # r2star negative values that were set to 0
            (rename_infosource, rename_R2star_neg_mask, [('renamed', 'subject_id')]),
            (R2star_neg_mask_reorient, rename_R2star_neg_mask, [('out_file', 'in_file')]),
            (rename_R2star_neg_mask, datasink, [('out_file', '@.@r2starneg')]),
        ])

    return wf
Ejemplo n.º 11
0
        wf_dir = args.wf_base_dir

    if args.ants_reg_quick:
        print("Use AntsRegistrationSynQuick for registration")
    else:
        print("Use AntsRegistrationSyn for registration")

    layout = BIDSLayout(args.bids_dir)
    if not subjects:
        subjects = layout.get_subjects(datatype="dwi")
    print(f"{len(subjects)} subject(s) found {subjects}")

    for subject in subjects:
        print(subject)
        # get sessions
        sessions = layout.get_sessions(subject=subject, datatype="dwi")
        sessions.sort()

        # set up acq for eddy
        if "lhab" in subject:
            acq_str = "0 1 0 {TotalReadoutTime}"
            study = "lhab"
        elif "CC" in subject:
            acq_str = "0 -1 0 0.0684"
            study = "camcan"
        elif "olm" in subject:
            acq_str = "0 1 0 {TotalReadoutTime}"
            study = "olm"
        else:
            raise ("Cannot determine study")
        wfs = []
    struct_params['acquisition'] = args['t1_acquisition']
elif args['fmri_acquisition'] is not None:
    time_series_params['acquisition'] = args['fmri_acquisition']
else:
    data_grabber_node_iterables.append(
        ('acquisition', layout.get_acquisitions()))

if args['session'] is not None:
    struct_params['session'] = args['session']
    time_series_params['session'] = args['session']
elif args['t1_session'] is not None:
    struct_params['session'] = args["t1_session"]
elif args['fmri_session'] is not None:
    time_series_params['session'] = args['fmri_session']
else:
    data_grabber_node_iterables.append(('session', layout.get_sessions()))

if args['t1_temp'] is not None:
    rcfe_setup.config['registration'] = rcfe_setup.Reg.t1
if args['epi_temp'] is not None:
    rcfe_setup.config['registration'] = rcfe_setup.Reg.epi
if args['results_dir'] is not None:
    rcfe_setup.config['results_directory'] = args['results_dir']
if args['draw_graphs'] == 0:
    rcfe_setup.config['graphs'] = False
if args['bias_correction'] == 0:
    rcfe_setup.config['bias_correction'] = False

from rcfe_pipeline_setup import full_process
from rcfe_pipeline_setup import input_handler_node
from rcfe_pipeline_setup import accept_input
Ejemplo n.º 13
0
def get_files(subject_id,
              session,
              task,
              raw_data_dir,
              preprocessed_data_dir,
              space=None,
              run=[],
              strict=True,
              **kwargs):
    """
    Given some information, retrieve all the files and metadata from a
    BIDS-formatted dataset that will be passed to the analysis pipeline.
    """
    from bids import BIDSLayout

    # only the raw files have the correct metadata, eg TR, and the event files are here
    raw_layout = BIDSLayout(raw_data_dir, validate=False, derivatives=False)
    preproc_layout = BIDSLayout(preprocessed_data_dir, validate=False)

    subjects = preproc_layout.get_subjects()
    assert subject_id in subjects and subject_id in raw_layout.get_subjects(
    ), "Subject not found!"

    sessions = preproc_layout.get_sessions()
    assert session in sessions, "Session not found!"

    tasks = preproc_layout.get_tasks()
    assert task in tasks, "Task not found!"

    if space == "None":
        space = None

    if space is None:
        print("Space is None")
        bolds = sorted([
            f for f in preproc_layout.get(subject=subject_id,
                                          session=session,
                                          task=task,
                                          run=run,
                                          suffix='bold',
                                          extension=['nii.gz'],
                                          return_type='file')
        ])
    else:
        bolds = sorted([
            f for f in preproc_layout.get(subject=subject_id,
                                          session=session,
                                          task=task,
                                          run=run,
                                          suffix='bold',
                                          extension=['nii.gz'],
                                          return_type='file')
            if f"space-{space}" in f
        ])
    print(f"BOLDS: {len(bolds)}\n{bolds}")
    if space is None:
        masks = sorted([
            f for f in preproc_layout.get(subject=subject_id,
                                          suffix='mask',
                                          session=session,
                                          task=task,
                                          extension=['nii.gz'],
                                          return_type='file')
        ])
        if not masks:
            masks = sorted([
                f for f in preproc_layout.get(subject=subject_id,
                                              suffix='mask',
                                              session=session,
                                              extension=['nii.gz'],
                                              return_type='file')
            ])
    else:
        masks = sorted([
            f for f in preproc_layout.get(subject=subject_id,
                                          suffix='mask',
                                          session=session,
                                          task=task,
                                          extension=['nii.gz'],
                                          return_type='file')
            if f"space-{space}" in f
        ])
        if not masks:
            masks = sorted([
                f for f in preproc_layout.get(subject=subject_id,
                                              suffix='mask',
                                              session=session,
                                              extension=['nii.gz'],
                                              return_type='file')
                if f"space-{space}" in f
            ])
    if len(masks
           ) == 1:  # there is only one mask and it is to be used for all runs
        masks = masks * len(bolds)
    print(f"Masks: {len(masks)}\n{masks}")
    eventfiles = sorted(
        raw_layout.get(subject=subject_id,
                       suffix='events',
                       task=task,
                       session=session,
                       run=run,
                       extension=['tsv'],
                       return_type='file'))
    print(f"Eventfiles: {len(eventfiles)}\n{eventfiles}")
    raw_bolds = sorted(
        raw_layout.get(subject=subject_id,
                       suffix='bold',
                       task=task,
                       session=session,
                       run=run,
                       extension=['nii.gz'],
                       return_type='file'))
    TRs = [raw_layout.get_metadata(f)['RepetitionTime'] for f in raw_bolds]
    print(TRs, len(TRs))
    confounds = sorted(
        preproc_layout.get(subject=subject_id,
                           suffix="regressors",
                           task=task,
                           session=session,
                           run=run,
                           extension=['tsv'],
                           return_type='file'))
    print(f"Confounds: {len(confounds)}\n{confounds}")
    if not confounds:
        confounds = [''] * len(bolds)
    #print(list(zip(bolds, masks, eventfiles, TRs)))
    # edit 11/9/18 - remove assert on event files, since some early hemifield scans don't have it
    # but warn!
    if (len(eventfiles) != len(bolds)):
        print("Some functional runs do not have corresponding event files!")
    assert TRs.count(TRs[0]) == len(
        TRs
    ), "Not all TRs are the same!"  # all runs for a particular task must have same TR
    if strict:
        assert len(bolds) == len(
            masks
        ) > 0, "Input lists are not the same length!"  # used to also check for ==len(confounds)
    TR = TRs[0]
    return bolds, masks, eventfiles, TR, confounds
Ejemplo n.º 14
0
Created on Wed Mar  6 09:43:43 2019

@author: Or Duek
small script that will remove (MB4iPAT2) from filename
Script was replaces and merged with creatBIDS.py

"""

import os
from bids import BIDSLayout

data_dir = '/media/Data/rcfTest'
#data_dir = '/media/Data/kpe_forFmriPrep'

layout = BIDSLayout(data_dir)
layout.get_sessions()
layout.get()  #, extension='nii.gz')[0].filename

# maybe need to change the way we look for all bold files with these parenthases

for i in source_epi:
    a = i.filename
    # print (i.filename)
    if a.find('(MB4iPAT2)') != -1:
        print("We have found an issue with ", a)
        b = a.split(
            '(MB4iPAT2)'
        )  # this is the part that will be omitted from the file name. If you have an extra - you should add that too.
        c = b[0] + b[1]  # cmobine toghether
        #change filename
        os.rename(a, c)
Ejemplo n.º 15
0
    if args.freesurfer_dir:
        freesurfer_dir = args.freesurfer_dir
    else:
        freesurfer_dir = os.path.join(args.out_dir, "freesurfer")
    out_dir = os.path.join(args.out_dir, "baracus")
    if not os.path.isdir(freesurfer_dir):
        os.makedirs(freesurfer_dir)
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    model_dir = resource_filename(Requirement.parse("baracus"), 'models')

    run("bids-validator " + args.bids_dir)
    layout = BIDSLayout(args.bids_dir)

    truly_longitudinal_study = True if len(layout.get_sessions()) > 1 else False
    subjects_to_analyze, sessions_to_analyze, freesurfer_subjects_to_analyze = get_subjects_session(layout,
                                                                                                    args.participant_label,
                                                                                                    truly_longitudinal_study)

    if args.analysis_level == "participant":

        data_files = run_prepare_all(args.bids_dir, freesurfer_dir, out_dir, subjects_to_analyze,
                                     sessions_to_analyze, args.n_cpus, args.license_key, args.skip_missing)

### REGRESS OUT SITE HERE ###
        if args.confound_file:
            confound_file = args.confound_file
            remove_confounds(data_files, confound_file)
            
                                
Ejemplo n.º 16
0
def generate_inputs(
    bids_dir,
    pybids_inputs,
    derivatives=False,
    pybids_config=None,
    search_terms=None,
    limit_to=None,
    participant_label=None,
    exclude_participant_label=None,
):
    """Dynamically generate snakemake inputs using pybids_inputs dict, and
    pybids to parse the bids dataset.

    Parameters
    ----------
    bids_dir : str
        Path to bids directory

    pybids_inputs : dict
        Configuration for bids inputs, with keys as the names (``str``)

        Nested `dicts` with the following required keys:

        * ``"filters"``: Dictionary containing keyword arguments that will
          be passed to pybids ``get()``.

        * ``"wildcards"``: List of (str) bids tags to include as wildcards in
          snakemake. At minimum this should usually include
          ``['subject','session']``, plus any other wildcards that you may
          want to make use of in your snakemake workflow, or want to retain
          in the output paths. Any wildcards in this list that are not in the
          filename will just be ignored.

    Returns
    -------
    dict:
        The dict returned by this functions contains seven items. Each of
        the following four items is a dict containing one item for each
        modality described by ``pybids_inputs``.

        * ``"input_path"``: String with a wildcard-filled path that matches
          the images for this modality.

        * ``"input_zip_lists"``: Dictionary where each key is a wildcard
          entity and each value is a list of the values found for that
          entity. Each of these lists has length equal to the number of
          images matched for this modality, so they can be zipped together to
          get a list of the wildcard values for each image.

        * ``"input_lists"``: Dictionary where each key is a wildcard entity
          and each value is a list of the unique values found for that
          entity. These lists may not be the same length.

        * ``"input_wildcards"``: Dictionary where each key is the name of a
          wildcard entity, and each value is the Snakemake wildcard used for
          that entity.

        Then there are three more top-level entries in the dictionary:

        * ``"subjects"``: A list of the subjects in the dataset.

        * ``"sessions"``: A list of the sessions in the dataset.

        * ``"subj_wildcards"``: The subject and session wildcards applicable
          to this dataset. ``{"subject": "{subject}"}`` if there is only one
          session, ``{"subject": "{subject}", "session": "{session}"}`` if
          there are multiple sessions.

    Notes
    -----
    As an example, consider the following BIDS dataset::

        bids-example/
        ├── dataset_description.json
        ├── participants.tsv
        ├── README
        └── sub-control01
            ├── anat
            │   ├── sub-control01_T1w.json
            │   ├── sub-control01_T1w.nii.gz
            │   ├── sub-control01_T2w.json
            │   └── sub-control01_T2w.nii.gz
            ├── dwi
            │   ├── sub-control01_dwi.bval
            │   ├── sub-control01_dwi.bvec
            │   └── sub-control01_dwi.nii.gz
            ├── fmap
            │   ├── sub-control01_magnitude1.nii.gz
            │   ├── sub-control01_phasediff.json
            │   ├── sub-control01_phasediff.nii.gz
            │   └── sub-control01_scans.tsv
            └── func
                ├── sub-control01_task-nback_bold.json
                ├── sub-control01_task-nback_bold.nii.gz
                ├── sub-control01_task-nback_events.tsv
                ├── sub-control01_task-nback_physio.json
                ├── sub-control01_task-nback_physio.tsv.gz
                ├── sub-control01_task-nback_sbref.nii.gz
                ├── sub-control01_task-rest_bold.json
                ├── sub-control01_task-rest_bold.nii.gz
                ├── sub-control01_task-rest_physio.json
                └── sub-control01_task-rest_physio.tsv.gz

    With the following ``pybids_inputs`` defined in the config file::

        pybids_inputs:
          bold:
            filters:
              suffix: 'bold'
              extension: '.nii.gz'
              datatype: 'func'
            wildcards:
              - subject
              - session
              - acquisition
              - task
              - run

    Then ``generate_inputs(bids_dir, pybids_input)`` would return the
    following dictionary::

        {
            "input_path": {
                "bold": "bids-example/sub-{subject}/func/sub-{subject}_task-{task}_bold.nii.gz"
            },
            "input_zip_lists": {
                "bold": {
                    "subject": ["control01", "control01"],
                    "task": ["nback", "rest"]
                }
            },
            "input_lists": {
                "bold": {
                    "subject": ["control01"],
                    "task": ["nback", "rest"]
                }
            },
            "input_wildcards": {
                "bold": {
                    "subject": "{subject}",
                    "task": "{task}"
                }
            },
            "subjects": ["subject01"],
            "sessions": [],
            "subj_wildcards": {"subject": "{subject}"}
        }
    """  # noqa

    search_terms = _generate_search_terms(participant_label, exclude_participant_label)

    if os.path.exists(bids_dir):
        # generate inputs based on config
        layout = BIDSLayout(
            bids_dir,
            derivatives=derivatives,
            validate=False,
            # In the next version of pybids, config will accept Paths, so we won't
            # need this long stringify line
            config=str(pybids_config) if pybids_config is not None else pybids_config,
            indexer=BIDSLayoutIndexer(validate=False, index_metadata=False),
        )
    else:
        _logger.info(
            "bids_dir does not exist, skipping PyBIDS and using "
            "custom file paths only"
        )
        layout = None

    # this will populate input_path, input_lists, input_zip_lists, and
    # input_wildcards
    inputs_config_dict = _get_lists_from_bids(
        bids_layout=layout,
        pybids_inputs=pybids_inputs,
        limit_to=limit_to,
        **search_terms,
    )

    if layout is None:
        # if no layout, then use subjects/sessions from --path vars
        subjects = []
        sessions = []
        for input_type in inputs_config_dict["input_lists"]:

            subj_set = set(inputs_config_dict["input_lists"][input_type]["subject"])

            # filter the list of subjects with participant_label
            if participant_label is not None:
                subj_set = set.intersection(subj_set, set(participant_label))

            # TODO: need to also remove subjects based on exclude_participant_label

            # replace with filtered list
            inputs_config_dict["input_lists"][input_type]["subject"] = list(subj_set)

            # add to set of subjects from all input_types
            subjects.append(subj_set)

            if "session" in (inputs_config_dict["input_lists"][input_type].keys()):
                sessions.append(
                    {inputs_config_dict["input_lists"][input_type]["session"]}
                )
            else:
                sessions.append(set([]))

        # take set intersection of all input types
        inputs_config_dict["subjects"] = list(set.intersection(*subjects))
        inputs_config_dict["sessions"] = list(set.intersection(*sessions))

    else:
        # populate subjects, sessions and subj_wildcards in the config
        inputs_config_dict["subjects"] = layout.get_subjects(**search_terms)
        inputs_config_dict["sessions"] = layout.get_sessions(**search_terms)

    if len(inputs_config_dict["sessions"]) == 0:
        inputs_config_dict["subj_wildcards"] = {"subject": "{subject}"}
    else:
        inputs_config_dict["subj_wildcards"] = {
            "subject": "{subject}",
            "session": "{session}",
        }

    return inputs_config_dict