def anon_acqtimes(dset_dir): """ Anonymize acquisition datetimes for a dataset. Anonymize acquisition datetimes for a dataset. Works for both longitudinal and cross-sectional studies. The time of day is preserved, but the first scan is set to January 1st, 1800. In a longitudinal study, each session is anonymized relative to the first session, so that time between sessions is preserved. Overwrites scan tsv files in dataset. Only run this *after* data collection is complete for the study, especially if it's longitudinal. Parameters ---------- dset_dir : str Path to BIDS dataset to be anonymized. """ bl_dt = parser.parse('1800-01-01') layout = BIDSLayout(dset_dir) subjects = layout.get_subjects() sessions = sorted(layout.get_sessions()) for sub in subjects: if not sessions: scans_file = op.join(dset_dir, f'sub-{sub}/sub-{sub}_scans.tsv') df = pd.read_csv(scans_file, sep='\t') first_scan = df['acq_time'].min() first_dt, _ = parser.parse(first_scan.split('T')) diff = first_dt - bl_dt acq_times = df['acq_time'].apply(parser.parse) acq_times = (acq_times - diff).astype(str) df['acq_time'] = acq_times # df.to_csv(scans_file, sep='\t', index=False) else: # Separated from dataset sessions in case subject missed some sub_ses = sorted(layout.get_sessions(subject=sub)) for i, ses in enumerate(sub_ses): scans_file = op.join( dset_dir, f'sub-{sub}/ses-{ses}/sub-{sub}_ses-{ses}_scans.tsv') df = pd.read_csv(scans_file, sep='\t') if i == 0: # Anonymize in terms of first scan for subject. first_scan = df['acq_time'].min() first_dt = parser.parse(first_scan.split('T')[0]) diff = first_dt - bl_dt acq_times = df['acq_time'].apply(parser.parse) acq_times = (acq_times - diff).astype(str) df['acq_time'] = acq_times
def read_BIDS_coordinates(BIDS_path): """from BIDS_path np array coordinate arrays are read and returned in list respective to subjects Args: BIDS_path (string): absolute BIDS path Returns: coord_arr (np array): array with shape (len(subjects), 4), where indexes in the following order: left ecog, left stn, right ecog, right stn, coord_arr_names (np array): array with shape (len(subjects), 2), where coord names are saved in order: left, right """ layout = BIDSLayout(BIDS_path) subjects = layout.get_subjects() sessions = layout.get_sessions() coord_arr = np.empty( (len(subjects), 4), dtype=object) # left ecog, left stn, right ecog, right stn coord_arr_names = np.empty((len(subjects), 2), dtype=object) for subject_idx, subject in enumerate(subjects): for sess in sessions: coord_path = os.path.join(BIDS_path, 'sub-' + subject, 'ses-' + sess, 'eeg', 'sub-' + subject + '_electrodes.tsv') print(coord_path) if os.path.exists(coord_path) is False: continue df = pd.read_csv(coord_path, sep="\t") if sess == 'left': if np.array(df['name'].str.contains("ECOG")).any(): coord_arr[subject_idx, 0] = np.ndarray.astype( np.array(df[df['name'].str.contains("ECOG")])[:, 1:4], float ) # [1:4] due to bipolar referencing (first electrode missing) if np.array(df['name'].str.contains("STN")).any(): coord_arr[subject_idx, 1] = np.ndarray.astype( np.array(df[df['name'].str.contains("STN")])[:, 1:4], float) coord_arr_names[subject_idx, 0] = list(df['name']) elif sess == 'right': if np.array(df['name'].str.contains("ECOG")).any(): coord_arr[subject_idx, 2] = np.ndarray.astype( np.array(df[df['name'].str.contains("ECOG")])[:, 1:4], float) if np.array(df['name'].str.contains("STN")).any(): coord_arr[subject_idx, 3] = np.ndarray.astype( np.array(df[df['name'].str.contains("STN")])[:, 1:4], float) coord_arr_names[subject_idx, 1] = list(df['name']) return coord_arr, coord_arr_names
def test_dcm2bids(): # tmpBase = os.path.join(TEST_DATA_DIR, "tmp") # bidsDir = TemporaryDirectory(dir=tmpBase) bidsDir = TemporaryDirectory() tmpSubDir = os.path.join(bidsDir.name, DEFAULT.tmpDirName, "sub-01") shutil.copytree(os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir) app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name, ) app.run() layout = BIDSLayout(bidsDir.name, validate=False) assert layout.get_subjects() == ["01"] assert layout.get_sessions() == [] assert layout.get_tasks() == ["rest"] assert layout.get_runs() == [1, 2, 3] app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name, ) app.run() fmapFile = os.path.join(bidsDir.name, "sub-01", "fmap", "sub-01_echo-492_fmap.json") data = load_json(fmapFile) fmapMtime = os.stat(fmapFile).st_mtime assert data["IntendedFor"] == "dwi/sub-01_dwi.nii.gz" data = load_json( os.path.join(bidsDir.name, "sub-01", "localizer", "sub-01_run-01_localizer.json")) assert data["ProcedureStepDescription"] == "Modify by dcm2bids" # rerun shutil.rmtree(tmpSubDir) shutil.copytree(os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir) app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name, ) app.run() fmapMtimeRerun = os.stat(fmapFile).st_mtime assert fmapMtime == fmapMtimeRerun if os.name != 'nt': bidsDir.cleanup()
def test_dcm2bids(): tmpBase = os.path.join(TEST_DATA_DIR, "tmp") #bidsDir = TemporaryDirectory(dir=tmpBase) bidsDir = TemporaryDirectory() tmpSubDir = os.path.join(bidsDir.name, DEFAULT.tmpDirName, "sub-01") shutil.copytree( os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir) app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name ) app.run() layout = BIDSLayout(bidsDir.name, validate=False) assert layout.get_subjects() == ["01"] assert layout.get_sessions() == [] assert layout.get_tasks() == ["rest"] assert layout.get_runs() == [1,2,3] app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name ) app.run() fmapFile = os.path.join( bidsDir.name, "sub-01", "fmap", "sub-01_echo-492_fmap.json") data = load_json(fmapFile) fmapMtime = os.stat(fmapFile).st_mtime assert data["IntendedFor"] == "dwi/sub-01_dwi.nii.gz" data = load_json(os.path.join( bidsDir.name, "sub-01", "localizer", "sub-01_run-01_localizer.json")) assert data["ProcedureStepDescription"] == "Modify by dcm2bids" #rerun shutil.rmtree(tmpSubDir) shutil.copytree( os.path.join(TEST_DATA_DIR, "sidecars"), tmpSubDir) app = Dcm2bids( [TEST_DATA_DIR], "01", os.path.join(TEST_DATA_DIR, "config_test.json"), bidsDir.name ) app.run() fmapMtimeRerun = os.stat(fmapFile).st_mtime assert fmapMtime == fmapMtimeRerun bidsDir.cleanup()
def _fill_empty_lists(layout: BIDSLayout, subjects: list, tasks: list, sessions: list, runs: t.List[str]): """ If filters are not provided by the user, load them from layout. """ subjects = subjects if subjects else layout.get_subjects() tasks = tasks if tasks else layout.get_tasks() sessions = sessions if sessions else layout.get_sessions() runs = runs if runs else layout.get_runs() return subjects, tasks, sessions, runs
def test_generate_bids_skeleton(tmp_path, test_id, json_layout, n_files, n_subjects, n_sessions): root = tmp_path / test_id generate_bids_skeleton(root, json_layout) datadesc = root / "dataset_description.json" assert datadesc.exists() assert "BIDSVersion" in datadesc.read_text() assert len([x for x in root.glob("**/*") if x.is_file()]) == n_files # ensure layout is valid layout = BIDSLayout(root) assert len(layout.get_subjects()) == n_subjects assert len(layout.get_sessions()) == n_sessions anat = layout.get(suffix="T1w", extension="nii.gz")[0] bold = layout.get(suffix="bold", extension="nii.gz")[0] assert anat.get_metadata() assert bold.get_metadata()
def main(**args): path = "/mnt/DATA_4Tera/Dati_Sherlock/bids/" derivatives = os.path.join(path, "derivatives", "afniproc") print("mkdir -p {}".format(derivatives)) os.system("mkdir -p {}".format(derivatives)) subj_deriv = os.path.join(derivatives, 'sub-{subject}', "ses-{session}") layout = BIDSLayout(path) subjects = layout.get_subjects() sessions = layout.get_sessions() for session in sessions: for subj in subjects: deriv_dir = subj_deriv.format(session=session, subject=subj) print("mkdir -p {}".format(deriv_dir)) os.system("mkdir -p {}".format(deriv_dir)) # Create anat and func anat_dir = os.path.join(subj_deriv, "{datatype}").format(session=session, subject=subj, datatype='anat') func_dir = os.path.join(subj_deriv, "{datatype}").format(session=session, subject=subj, datatype='func') print("mkdir -p {}".format(anat_dir)) os.system("mkdir -p {}".format(anat_dir)) print("mkdir -p {}".format(func_dir)) os.system("mkdir -p {}".format(func_dir)) # Check and convert T1 to send Freesurfer segmentation t1 = layout.get(subject=subj, session=session, suffix='T1w')[0] entities = t1.get_entities() pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}[_ses-{session}][_desc-{desc}]_{suffix}.{extension}" ) entities['desc'] = 'fsprep' t1_fs = layout.build_path(entities, pattern, validate=False) entities['extension'] = 'txt' t1_log = layout.build_path(entities, pattern, validate=False) command = "check_dset_for_fs.py -input %s -fix_all -fix_out_prefix %s -fix_out_vox_dim 1 -verb > %s" command = command % (t1.path, t1_fs, t1_log) print(command) os.system(command) check_fs(layout, subj, session, subj_deriv) ### T1 to MNI space coreg entities = t1.get_entities() entities['desc'] = 'fsprep' entities['space'] = 'MNI152' pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}[_ses-{session}][_space-{space}][_desc-{desc}]_{suffix}.{extension}" ) t1_mni = layout.build_path(entities, pattern, validate=False) command = "@auto_tlrc -base MNI152_2009_template.nii.gz -pad_base 35 -prefix {prefix} -input {input}" command = command.format(prefix=t1_mni, input=t1_fs) print(command) os.system(command) runs = layout.get_runs() ordered_bold = [] for run in runs: fname = layout.get(session=session, subject=subj, run=run, suffix='bold') if len(fname) != 0: ordered_bold.append(fname[0]) # Slice time correction - motion correction - align EPI to Anat to MNI bold = layout.get(subject=subj, session=session, suffix='bold', extension='nii.gz') # 1D File slice_timing = np.array(bold[0].get_metadata()['SliceTiming']) slice_fname = os.path.join(path, "slice_timing.txt") np.savetxt(slice_fname, slice_timing, delimiter=' ', newline=' ', fmt='%.5f') epi = layout.get(subject=subj, session=session, suffix='bold', extension='nii.gz', run=1)[0].path child_epi = " ".join([b.path for b in ordered_bold]) command = "align_epi_anat.py -anat {anat} -epi {epi} -child_epi {child_epi}"+\ " -epi_base 0 -tshift_opts -tpattern {tpattern} -epi2anat -giant_move"+\ " -tlrc_apar {tlrc_apar}" command = command.format(anat=t1_fs, epi=epi, child_epi=child_epi, tpattern=slice_fname, tlrc_apar=t1_mni) print(command) os.system(command) # Create mask automask = " ".join( [b.filename[:-7] + '_tlrc_al+tlrc.HEAD' for b in bold]) mean_mask_prefix = 'mean_mni.nii.gz' command = '3dTstat -prefix {prefix} {input}'.format( prefix=mean_mask_prefix, input=automask) print(command) os.system(command) entities = bold[0].get_entities() entities['suffix'] = 'mask' entities['space'] = 'MNI152' pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}[_ses-{session}][_space-{space}][_desc-{desc}]_{suffix}.{extension}" ) mask_prefix = layout.build_path(entities, pattern, validate=False) command = '3dAutomask -prefix {prefix} {input}'.format( input=mean_mask_prefix, prefix=mask_prefix) print(command) os.system(command) # Clean files removed = " ".join([b.filename[:-7] + '_al+orig.*' for b in bold]) command = "rm " + removed print(command) os.system(command) command = "rm __tt_*.*" print(command) os.system(command) command = "rm malldump.*" print(command) os.system(command) # Put files in BIDS header = [ 'trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z' ] motion_ordered = list() for bold in ordered_bold: motion_fname = bold.filename[:-7] + "_vr_motion.1D" motion = np.genfromtxt(motion_fname) motion_ordered.append(motion_fname) entities = bold.get_entities() entities['suffix'] = 'motion' entities['desc'] = 'volreg' entities['extension'] = 'tsv' pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}_ses-{session}_task-{task}_run-{run:02d}_desc-{desc}_{suffix}.{extension}" ) motion_bids = pattern.format(**entities) motion = motion[:, [3, 4, 5, 0, 1, 2]] np.savetxt(motion_bids, motion, fmt="%f", delimiter="\t", header="\t".join(header)) print("rm " + motion_fname) os.system("rm " + motion_fname) for desc in ['mat', "reg_mat", "tlrc_mat"]: affine_fname = bold.filename[:-7] + "_al_" + desc + ".aff12.1D" entities = bold.get_entities() entities['suffix'] = 'affine' entities['desc'] = desc.replace("_", "") entities['extension'] = 'tsv' pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}_ses-{session}_task-{task}_run-{run:02d}_desc-{desc}_{suffix}.{extension}" ) affine_bids = pattern.format(**entities) command = "mv {0} {1}".format(affine_fname, affine_bids) print(command) os.system(command) afni_bold = bold.filename[:-7] + "_tlrc_al+tlrc" entities = bold.get_entities() entities['desc'] = "afniproc" entities['extension'] = 'nii.gz' entities['space'] = 'MNI152' pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}_ses-{session}_task-{task}_run-{run:02d}_space-{space}_desc-{desc}_{suffix}.{extension}" ) afni_bids = pattern.format(**entities) command = "3dcopy {0} {1}".format(afni_bold, afni_bids) print(command) os.system(command) print("rm {}*".format(afni_bold)) os.system("rm {}*".format(afni_bold)) # Create confound regressors # Motion motion_files = list() for run in runs: f = layout.get(subject=subj, session=session, task=session, run=run, suffix='motion') if len(f) != 0: motion_files.append(f[0]) motion_df = [ pd.read_csv(m.path, delimiter="\t") for m in motion_files ] motion_demean = [m - m.mean(0) for m in motion_df] motion_demean = pd.concat(motion_demean) entities = motion_files[0].get_entities() entities['desc'] = 'demean' pattern = os.path.join( subj_deriv, "{datatype}", "sub-{subject}_ses-{session}_desc-{desc}_{suffix}.{extension}") demean_fname = pattern.format(**entities) motion_demean.to_csv(demean_fname, header=False, index=False, sep="\t") motion_deriv = [m.diff() for m in motion_df] motion_deriv = [m.fillna(0) for m in motion_deriv] motion_deriv = [m - m.mean(0) for m in motion_deriv] motion_deriv = pd.concat(motion_deriv) entities['desc'] = 'deriv' deriv_fname = pattern.format(**entities) motion_deriv.to_csv(deriv_fname, header=False, index=False, sep="\t") tr_counts = [m.shape[0] for m in motion_df] for j, t in enumerate(tr_counts): command = "1dBport -nodata {ntr} 1 -band 0.01 999 -invert -nozero > bpass.1D".format( ntr=t) print(command) os.system(command) command = "1d_tool.py -infile bpass.1D -pad_into_many_runs {run:1d} {n_runs}"+\ " -set_run_lengths {tr_counts} -write bpass.{run:02d}.1D" command = command.format(run=j + 1, n_runs=str(len(tr_counts)), tr_counts=" ".join( [str(t) for t in tr_counts])) print(command) os.system(command) entities['desc'] = 'bpass' entities['suffix'] = 'timeseries' entities['extension'] = '1D' bpass_fname = pattern.format(**entities) command = "1dcat bpass.*.1D > {}".format(bpass_fname) print(command) os.system(command) command = "rm bpass*" print(command) os.system(command)
def main(**args): outfiles = ['fitts', 'errts', 'stats', 'betas'] path = args['path'] pipeline = args['pipeline'] command = '3dDeconvolve -input {files} -jobs {n_jobs} -polort {polort} -float {confounds} {events_string} '+ \ ' -mask {mask} -allzero_OK -fout -tout -x1D {design_matrix_txt} -xjpeg {design_matrix_jpg} -xsave '+\ '-fitts {fitts} -errts {errts} -bucket {stats} -cbucket {betas} -rout -gltsym "SYM: RESP+L -RESP+R" -glt_label 1 RespLvsRespR' extra_event = process_extraevent_arg(args['extra_event']) derivatives = os.path.join(path, "derivatives", pipeline) print("mkdir -p {}".format(derivatives)) os.system("mkdir -p {}".format(derivatives)) derivatives_pattern = os.path.join(derivatives, 'sub-{subject}', "ses-{session}") pattern = os.path.join( derivatives_pattern, "{datatype}", "sub-{subject}[_ses-{session}][_space-{space}][_desc-{desc}]_{suffix}.{extension}" ) layout = BIDSLayout(path, derivatives=True) subjects = layout.get_subjects() subjects.remove('lormat') sessions = layout.get_sessions() # TODO: Check if there are sessions for session in sessions: for subj in subjects: deriv_dir = derivatives_pattern.format(session=session, subject=subj) print("mkdir -p {}".format(deriv_dir)) os.system("mkdir -p {}".format(deriv_dir)) # Create func func_dir = os.path.join(derivatives_pattern, "{datatype}").format(session=session, subject=subj, datatype='func') print("mkdir -p {}".format(func_dir)) os.system("mkdir -p {}".format(func_dir)) # Main command files = layout.get(subject=subj, session=session, task=session, desc='afniproc', extension='nii.gz') entities = files[0].get_entities() files = " ".join(f.path for f in files) args['files'] = files confounds = '' for desc in ['bpass', 'demean']: ort_files = layout.get(subject=subj, session=session, desc=desc) confounds += '-ortvec {} {} '.format(ort_files[0].path, desc) args['confounds'] = confounds # Stimuli stims = bids2afni_events(subj, session, layout, pattern, extra_event=extra_event) write_afni(stims) args['events_string'] = stims_times(stims) # Mask mask = layout.get(subject=subj, session=session, suffix='mask', extension='nii.gz') args['mask'] = mask[0].path # Buckets for desc in outfiles: entities['desc'] = pipeline entities['suffix'] = desc args[desc] = layout.build_path(entities, pattern, validate=False) for extension in ['jpg', 'txt']: entities['suffix'] = 'dmatrix' entities['extension'] = extension args['design_matrix_' + extension] = layout.build_path( entities, pattern, validate=False) print(command.format(**args)) os.system(command.format(**args))
def bidsmri2project(directory, args): # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not cde=Graph() # Parse dataset_description.json file in BIDS directory if (os.path.isdir(os.path.join(directory))): try: with open(os.path.join(directory,'dataset_description.json')) as data_file: dataset = json.load(data_file) except OSError: logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec") exit("-1") else: logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory)) exit("-1") # create project / nidm-exp doc project = Project() # if there are git annex sources then add them num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: project.add_attributes({Constants.PROV['Location']:"file:/" + directory}) # add various attributes if they exist in BIDS dataset for key in dataset: # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.dataset_description: if type(dataset[key]) is list: project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])}) else: project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]}) # get BIDS layout bids_layout = BIDSLayout(directory) # create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics session={} participant={} # Parse participants.tsv file in BIDS directory and create study and acquisition objects if os.path.isfile(os.path.join(directory,'participants.tsv')): with open(os.path.join(directory,'participants.tsv')) as csvfile: participants_data = csv.DictReader(csvfile, delimiter='\t') # logic to map variables to terms. # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not. For those that are not # we want to use the variable-term mapping functions to help the user do the mapping # iterate over columns mapping_list=[] column_to_terms={} for field in participants_data.fieldnames: # column is not in BIDS_Constants if not (field in BIDS_Constants.participants): # add column to list for column_to_terms mapping mapping_list.append(field) #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use if args.json_map == False: #defaults to participants.json because here we're mapping the participants.tsv file variables to terms # if participants.json file doesn't exist then run without json mapping file if not os.path.isfile(os.path.join(directory,'participants.json')): #maps variables in CSV file to terms temp=DataFrame(columns=mapping_list) if args.no_concepts: column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv', df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False) else: column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv', df=temp,output_file=os.path.join(directory,'participants.json'),bids=True) else: #maps variables in CSV file to terms temp=DataFrame(columns=mapping_list) if args.no_concepts: column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False) else: column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True) else: #maps variables in CSV file to terms temp=DataFrame(columns=mapping_list) if args.no_concepts: column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True,associate_concepts=False) else: column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True) for row in participants_data: #create session object for subject to be used for participant metadata and image data #parse subject id from "sub-XXXX" string temp = row['participant_id'].split("-") #for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX if len(temp) > 1: subjid = temp[1] else: subjid = temp[0] logging.info(subjid) session[subjid] = Session(project) #add acquisition object acq = AssessmentAcquisition(session=session[subjid]) acq_entity = AssessmentObject(acquisition=acq) participant[subjid] = {} participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) # add nfo:filename entry to assessment entity to reflect provenance of where this data came from acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)}) #acq_entity.add_attributes({Constants.NIDM_FILENAME:os.path.join(directory,'participants.tsv')}) #add qualified association of participant with acquisition activity acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT) # print(acq) # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')}) # if there's a JSON sidecar file then create an entity and associate it with all the assessment entities if os.path.isfile(os.path.join(directory,'participants.json')): json_sidecar = AssessmentObject(acquisition=acq) json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME: getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)}) # add Git Annex Sources # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')}) # check if json_sidecar entity exists and if so associate assessment entity with it if 'json_sidecar' in locals(): #connect json_entity with acq_entity acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar}) for key,value in row.items(): if not value: continue #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user #supplied arguments to map variables if key in BIDS_Constants.participants: # WIP # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for each BIDS-compliant dataset if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): # create a namespace with the URL for fixed BIDS_Constants term # item_ns = Namespace(str(Constants.BIDS.namespace.uri)) # add prefix to namespace which is the BIDS fixed variable name # cde.bind(prefix="bids", namespace=item_ns) # ID for BIDS variables is always the same bids:[bids variable] cde_id = Constants.BIDS[key] # add the data element to the CDE graph cde.add((cde_id,RDF.type, Constants.NIDM['DataElement'])) cde.add((cde_id,RDF.type, Constants.PROV['Entity'])) # add some basic information about this data element cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart))) cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri))) cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key))) cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier"))) cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification"))) cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"]))) acq_entity.add_attributes({cde_id:Literal(value)}) # if this was the participant_id, we already handled it above creating agent / qualified association # if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): # acq_entity.add_attributes({BIDS_Constants.participants[key]:value}) # else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used # in CSV2NIDM.py) else: # WIP: trying to add new support for CDEs... add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value) # if key in column_to_terms: # acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value}) # else: # acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value}) # create acquisition objects for each scan for each subject # loop through all subjects in dataset for subject_id in bids_layout.get_subjects(): logging.info("Converting subject: %s" %subject_id) # skip .git directories...added to support datalad datasets if subject_id.startswith("."): continue # check if there are a session numbers. If so, store it in the session activity and create a new # sessions for these imaging acquisitions. Because we don't know which imaging session the root # participants.tsv file data may be associated with we simply link the imaging acquisitions to different # sessions (i.e. the participants.tsv file goes into an AssessmentAcquisition and linked to a unique # sessions and the imaging acquisitions go into MRAcquisitions and has a unique session) imaging_sessions = bids_layout.get_sessions(subject=subject_id) # if session_dirs has entries then get any metadata about session and store in session activity # bids_layout.get(subject=subject_id,type='session',extensions='.tsv') # bids_layout.get(subject=subject_id,type='scans',extensions='.tsv') # bids_layout.get(extensions='.tsv',return_type='obj') # loop through each session if there is a sessions directory if len(imaging_sessions) > 0: for img_session in imaging_sessions: # create a new session ses = Session(project) # add session number as metadata ses.add_attributes({Constants.BIDS['session_number']:img_session}) addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session) # else we have no ses-* directories in the BIDS layout addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory) # Added temporarily to support phenotype files # for each *.tsv / *.json file pair in the phenotypes directory # WIP: ADD VARIABLE -> TERM MAPPING HERE for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")): # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to # the associated JSON data dictionary file with open(tsv_file) as phenofile: pheno_data = csv.DictReader(phenofile, delimiter='\t') for row in pheno_data: subjid = row['participant_id'].split("-") if not subjid[1] == subject_id: continue else: # add acquisition object acq = AssessmentAcquisition(session=session[subjid[1]]) # add qualified association with person acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT) acq_entity = AssessmentObject(acquisition=acq) for key,value in row.items(): if not value: continue # we're using participant_id in NIDM in agent so don't add to assessment as a triple. # BIDS phenotype files seem to have an index column with no column header variable name so skip those if ((not key == "participant_id") and (key != "")): # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs.. acq_entity.add_attributes({Constants.BIDS[key]:value}) # link TSV file acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)}) #acq_entity.add_attributes({Constants.NIDM_FILENAME:tsv_file}) # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file}) # link associated JSON file if it exists data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json") if os.path.isfile(data_dict): # if file exists, create a new entity and associate it with the appropriate activity and a used relationship # with the TSV-related entity json_entity = AssessmentObject(acquisition=acq) json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME: getRelPathToBIDS(data_dict,directory)}) # add Git Annex Sources # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory) # else just add the local path to the dataset if num_sources == 0: json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict}) #connect json_entity with acq_entity acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()}) return project, cde
def create_pipeline_SS_TV(bids_dir, work_dir, out_dir, subjects, sessions, mag_match_pattern, phase_match_pattern, mask_match_pattern, keep_unnecessary_outputs, FAST_bias_iters, FAST_bias_lowpass, FAST_num_classes, skip_fast, brain_extract_method, BET_frac, single_subject_custom_mask, freq_weights__snr_window_sz, truncate_echo, SS_TV_lagrange_parameter, B0_dir, scnd_diff_reliability_thresh_noise, trim_radius_sz, scnd_diff_reliability_thresh_trim, skip_qsm, skip_r2star, matlab_executable, mcr_location, run_mode): layout = BIDSLayout(bids_dir) ### CREATE PIPELINE OBJECT pipelineDir = work_dir wf = pe.Workflow(name="SS_TV") wf.base_dir = pipelineDir wf.config['execution']['remove_unnecessary_outputs'] = not keep_unnecessary_outputs ### GET MULTI-ECHO DATA # can we do this more elegantly? first_echo_files = [] for subject in subjects: if layout.get_sessions(subject=subject) == []: if sessions == ['.*']: first_echo_files = first_echo_files + layout.get(subject=subject, modality='anat', extensions='.*part-phase.*echo-0*1.*.nii.*', ) else: print( "Warning: Session filter applied, but subject " + subject + " has no bids session information. This subject has been ignored.") else: for session in sessions: first_echo_files = first_echo_files + layout.get(subject=subject, session=session, modality='anat', extensions='.*part-phase.*echo-0*1.*.nii.*', ) anat_folders = [] for img in first_echo_files: full_dirname = os.path.dirname(img.filename) remove_base_dir = full_dirname.replace(bids_dir, '') remove_leading_slash = remove_base_dir.lstrip(os.sep) anat_folders.append(remove_leading_slash) anat_folders = list(set(anat_folders)) anat_folders.sort() # IdentityInterface is useful for passing subject directory structure to datasink infosource = pe.Node(niu.IdentityInterface(fields=['subject_id']), name="infosource") infosource.iterables = ('subject_id', anat_folders) ### NODES AND PARAMETERS if brain_extract_method == BrainExtractMethod.BIDS: datasource = pe.Node( nio.DataGrabber(infields=['subject_id'], outfields=['phase_images', 'mag_images', 'phase_jsons', 'mag_jsons', 'brain_mask']), name='datasource') datasource.inputs.field_template = dict( phase_images='%s/' + phase_match_pattern + '.nii*', phase_jsons='%s/' + phase_match_pattern + '.json', mag_images='%s/' + mag_match_pattern + '.nii*', mag_jsons='%s/' + mag_match_pattern + '.json', brain_mask='%s/' + mask_match_pattern + '.nii*', ) else: datasource = pe.Node( nio.DataGrabber(infields=['subject_id'], outfields=['phase_images', 'mag_images', 'phase_jsons', 'mag_jsons']), name='datasource') datasource.inputs.field_template = dict( phase_images='%s/' + phase_match_pattern + '.nii*', phase_jsons='%s/' + phase_match_pattern + '.json', mag_images='%s/' + mag_match_pattern + '.nii*', mag_jsons='%s/' + mag_match_pattern + '.json', ) datasource.inputs.sort_filelist = True datasource.inputs.template = "*" datasource.inputs.base_directory = bids_dir # this node must change depending on the scanner vendor susc_phase_preprocess = pe.Node(SiemensPhasePreprocess(), name='susc_phase_preprocess') avg_and_freq_estimate_weights = pe.Node(GetAvgAndSNRMap(), name='avg_and_freq_estimate_weights') avg_and_freq_estimate_weights.inputs.snr_window_sz = freq_weights__snr_window_sz avg_and_freq_estimate_weights.inputs.avg_out_filename = "avg.nii.gz" avg_and_freq_estimate_weights.inputs.snr_map_out_filename = "weights.nii.gz" wf.connect([ (infosource, datasource, [('subject_id', 'subject_id')]), (datasource, avg_and_freq_estimate_weights, [('mag_images', 'mag')]), (datasource, susc_phase_preprocess, [('phase_images', 'infiles')]) ]) if brain_extract_method == BrainExtractMethod.BET: brain_extract = pe.Node(fsl.BET(), name='brain_extract_bet') brain_extract.inputs.frac = BET_frac brain_extract.inputs.mask = True brain_extract.inputs.robust = True if skip_fast: # connect avg directly to bet (skip FAST if image uniform enough for brain extraction) wf.connect([ (avg_and_freq_estimate_weights, brain_extract, [('avg_out_filename', 'in_file')]) ]) else: # connect avg to nu correction, connect nu correction to bet """ #spm worked better for varian 7T data #if using spm, these prameters are needed bias_regularization=.001 sampling_distance=2.0 bias_fwhm=30 nonuniformityCorrect_spm=pe.Node(spm.preprocess.Segment(),name='nonuniformityCorrect_spm') nonuniformityCorrect_spm.inputs.bias_regularization=bias_regularization nonuniformityCorrect_spm.inputs.sampling_distance=sampling_distance nonuniformityCorrect_spm.inputs.bias_fwhm=bias_fwhm nonuniformityCorrect_spm.inputs.save_bias_corrected=True """ nonuniformity_correct_fsl = pe.Node(fsl.FAST(), name='nonuniformity_correct_fsl') nonuniformity_correct_fsl.inputs.img_type = 2 # 1 for t1, 2 for t2 nonuniformity_correct_fsl.inputs.bias_iters = FAST_bias_iters # higher for larger nonuniformity nonuniformity_correct_fsl.inputs.bias_lowpass = FAST_bias_lowpass # spm uses 30 nonuniformity_correct_fsl.inputs.number_classes = FAST_num_classes # spm uses 5 nonuniformity_correct_fsl.inputs.output_biasfield = True nonuniformity_correct_fsl.inputs.output_biascorrected = True nonuniformity_correct_fsl.interface.estimated_memory_gb = 10 wf.connect([ # spm requires matlab # (avg_and_freq_estimate_weights, nonuniformityCorrect_spm, [('avgOutFilename', 'data')]), # (nonuniformityCorrect_spm, brain_extract, [('bias_corrected_image', 'in_file')]), (avg_and_freq_estimate_weights, nonuniformity_correct_fsl, [('avg_out_filename', 'in_files')]), (nonuniformity_correct_fsl, brain_extract, [('restored_image', 'in_file')]) ]) elif brain_extract_method == BrainExtractMethod.BIDS: brain_extract = pe.Node( nio.DataGrabber(infields=['subject_id'], outfields=['mask_file']), name='bids_brain_mask') brain_extract.inputs.field_template = dict( mask_file='%s/' + mask_match_pattern + '.nii*', ) brain_extract.inputs.sort_filelist = False brain_extract.inputs.template = "*" brain_extract.inputs.base_directory = bids_dir wf.connect([ (infosource, brain_extract, [('subject_id', 'subject_id')]), ]) elif brain_extract_method == BrainExtractMethod.SINGLE_SUBJECT_FULL_PATH: brain_extract = pe.Node(niu.IdentityInterface(fields=['mask_file']), name="fullpath_brain_mask") brain_extract.inputs.mask_file = single_subject_custom_mask freq_est = pe.Node(EstimateFrequencyFromWrappedPhase(), 'freq_est') freq_est.inputs.truncate_echo = truncate_echo freq_est.inputs.freq_filename = "freq_est.nii.gz" freq_est.interface.estimated_memory_gb = 4 fieldmap_reorient = pe.Node(fsl.Reorient2Std(), name='fieldmap_reorient') datasink = pe.Node(nio.DataSink(), name="datasink") datasink.inputs.base_directory = out_dir + '/qsm_sstv/' datasink.inputs.parameterization = False rename_infosource = pe.Node(replace_slash, "rename_infosource") rename_fieldmap = pe.Node(niu.Rename(format_string="%(subject_id)s-fieldmap", keep_ext=True), "rename_fieldmap") wf.connect([ (susc_phase_preprocess, freq_est, [('outfiles', 'phase')]), (datasource, freq_est, [('phase_jsons', 'json')]), (brain_extract, freq_est, [('mask_file', 'mask')]), (avg_and_freq_estimate_weights, freq_est, [('snr_map_out_filename', 'weight')]), (freq_est, fieldmap_reorient, [('freq_filename', 'in_file')]), # rename files and data sink (infosource, rename_infosource, [('subject_id', 'filename')]), # fieldmap (rename_infosource, rename_fieldmap, [('renamed', 'subject_id')]), (fieldmap_reorient, rename_fieldmap, [('out_file', 'in_file')]), (rename_fieldmap, datasink, [('out_file', '@')]), (infosource, datasink, [('subject_id', 'container')]), ]) if not (skip_qsm and skip_r2star): trim_mask = pe.Node(TrimMaskUsingReliability(), name='trim_mask') trim_mask.inputs.erosion_sz = trim_radius_sz # in mm trim_mask.inputs.threshold = scnd_diff_reliability_thresh_trim trim_mask.inputs.trimmed_mask_filename = "trim_mask.nii.gz" trim_mask.inputs.reliability_filename = "unreliableMap.nii.gz" trim_mask.interface.estimated_memory_gb = 25 wf.connect([ (freq_est, trim_mask, [('freq_filename', 'phase')]), (brain_extract, trim_mask, [('mask_file', 'mask')]) ]) if not skip_qsm: unreliable_fieldmap_voxels = pe.Node(CalculateReliabilityMask(), name='unreliable_fieldmap_voxels') unreliable_fieldmap_voxels.inputs.threshold = scnd_diff_reliability_thresh_noise unreliable_fieldmap_voxels.inputs.reliability_mask_filename = "unreliableMask.nii.gz" unreliable_fieldmap_voxels.inputs.reliability_filename = "unreliableMap.nii.gz" CF_value = pe.Node(GetCFFromJson, name='CFValue') susceptibility = pe.Node(SS_TV(run_mode, matlab_executable, mcr_location), name='susceptibility') susceptibility.inputs.alpha = SS_TV_lagrange_parameter susceptibility.inputs.B0_dir = B0_dir susceptibility.inputs.susceptibility_filename = 'susceptibilityMap.nii.gz' susceptibility.interface.estimated_memory_gb = 10 QSM_reorient = pe.Node(fsl.Reorient2Std(), name='QSM_reorient') QSM_brain_mask_reorient = pe.Node(fsl.Reorient2Std(), name='QSM_brain_mask_reorient') QSM_noise_mask_reorient = pe.Node(fsl.Reorient2Std(), name='QSM_noise_mask_reorient') rename_QSM = pe.Node(niu.Rename(format_string="%(subject_id)s-QSM", keep_ext=True), "rename_QSM") rename_QSM_brain_mask = pe.Node(niu.Rename(format_string="%(subject_id)s-QSM_brainMask", keep_ext=True), "rename_QSM_brain_mask") rename_QSM_noise_mask = pe.Node(niu.Rename(format_string="%(subject_id)s-QSM_noiseMask", keep_ext=True), "rename_QSM_noise_mask") wf.connect([ (freq_est, unreliable_fieldmap_voxels, [('freq_filename', 'phase')]), (brain_extract, unreliable_fieldmap_voxels, [('mask_file', 'mask')]), (freq_est, susceptibility, [('freq_filename', 'freq_loc')]), (datasource, CF_value, [('mag_jsons', 'filename')]), (unreliable_fieldmap_voxels, susceptibility, [('reliability_mask_filename', 'reliability_mask_loc')]), (trim_mask, susceptibility, [('trimmed_mask_filename', 'mask_loc')]), (CF_value, susceptibility, [('CF_value', 'CF')]), (susceptibility, QSM_reorient, [('susceptibility_filename', 'in_file')]), (trim_mask, QSM_brain_mask_reorient, [('trimmed_mask_filename', 'in_file')]), (unreliable_fieldmap_voxels, QSM_noise_mask_reorient, [('reliability_mask_filename', 'in_file')]), # qsm (rename_infosource, rename_QSM, [('renamed', 'subject_id')]), (QSM_reorient, rename_QSM, [('out_file', 'in_file')]), (rename_QSM, datasink, [('out_file', '@.@qsm')]), # qsm brain mask (rename_infosource, rename_QSM_brain_mask, [('renamed', 'subject_id')]), (QSM_brain_mask_reorient, rename_QSM_brain_mask, [('out_file', 'in_file')]), (rename_QSM_brain_mask, datasink, [('out_file', '@.@qsm_brain')]), # qsm noisey voxels in fieldmap (rename_infosource, rename_QSM_noise_mask, [('renamed', 'subject_id')]), (QSM_noise_mask_reorient, rename_QSM_noise_mask, [('out_file', 'in_file')]), (rename_QSM_noise_mask, datasink, [('out_file', '@.@qsm_noise')]), ]) if not skip_r2star: R2Star = pe.Node(CalcR2Star(), 'R2Star') R2Star.inputs.R2star = 'R2star.nii.gz' R2Star.inputs.neg_mask = 'negMask.nii.gz' R2Star.inputs.nan_mask = 'nanMask.nii.gz' # R2Star.interface.estimated_memory_gb = 5 R2star_reorient = pe.Node(fsl.Reorient2Std(), name='R2star_reorient') R2star_fit_reorient = pe.Node(fsl.Reorient2Std(), name='R2star_fit_reorient') R2star_neg_mask_reorient = pe.Node(fsl.Reorient2Std(), name='R2star_neg_mask_reorient') rename_R2star = pe.Node(niu.Rename(format_string="%(subject_id)s-R2star", keep_ext=True), "rename_R2star") rename_R2star_fit = pe.Node(niu.Rename(format_string="%(subject_id)s-R2star_fit", keep_ext=True), "rename_R2star_fit") rename_R2star_neg_mask = pe.Node(niu.Rename(format_string="%(subject_id)s-R2star_negMask", keep_ext=True), "rename_R2star_neg_mask") wf.connect([ (datasource, R2Star, [('mag_images', 'mag')]), (susc_phase_preprocess, R2Star, [('outfiles', 'phase')]), (freq_est, R2Star, [('freq_filename', 'freq_loc')]), (trim_mask, R2Star, [('trimmed_mask_filename', 'mask')]), (datasource, R2Star, [('mag_jsons', 'json')]), (R2Star, R2star_reorient, [('R2star', 'in_file')]), (R2Star, R2star_fit_reorient, [('R2star_fit', 'in_file')]), (R2Star, R2star_neg_mask_reorient, [('neg_mask', 'in_file')]), # r2star (rename_infosource, rename_R2star, [('renamed', 'subject_id')]), (R2star_reorient, rename_R2star, [('out_file', 'in_file')]), (rename_R2star, datasink, [('out_file', '@.@r2star')]), # r2star fit map (rename_infosource, rename_R2star_fit, [('renamed', 'subject_id')]), (R2star_fit_reorient, rename_R2star_fit, [('out_file', 'in_file')]), (rename_R2star_fit, datasink, [('out_file', '@.@r2starfit')]), # r2star negative values that were set to 0 (rename_infosource, rename_R2star_neg_mask, [('renamed', 'subject_id')]), (R2star_neg_mask_reorient, rename_R2star_neg_mask, [('out_file', 'in_file')]), (rename_R2star_neg_mask, datasink, [('out_file', '@.@r2starneg')]), ]) return wf
wf_dir = args.wf_base_dir if args.ants_reg_quick: print("Use AntsRegistrationSynQuick for registration") else: print("Use AntsRegistrationSyn for registration") layout = BIDSLayout(args.bids_dir) if not subjects: subjects = layout.get_subjects(datatype="dwi") print(f"{len(subjects)} subject(s) found {subjects}") for subject in subjects: print(subject) # get sessions sessions = layout.get_sessions(subject=subject, datatype="dwi") sessions.sort() # set up acq for eddy if "lhab" in subject: acq_str = "0 1 0 {TotalReadoutTime}" study = "lhab" elif "CC" in subject: acq_str = "0 -1 0 0.0684" study = "camcan" elif "olm" in subject: acq_str = "0 1 0 {TotalReadoutTime}" study = "olm" else: raise ("Cannot determine study") wfs = []
struct_params['acquisition'] = args['t1_acquisition'] elif args['fmri_acquisition'] is not None: time_series_params['acquisition'] = args['fmri_acquisition'] else: data_grabber_node_iterables.append( ('acquisition', layout.get_acquisitions())) if args['session'] is not None: struct_params['session'] = args['session'] time_series_params['session'] = args['session'] elif args['t1_session'] is not None: struct_params['session'] = args["t1_session"] elif args['fmri_session'] is not None: time_series_params['session'] = args['fmri_session'] else: data_grabber_node_iterables.append(('session', layout.get_sessions())) if args['t1_temp'] is not None: rcfe_setup.config['registration'] = rcfe_setup.Reg.t1 if args['epi_temp'] is not None: rcfe_setup.config['registration'] = rcfe_setup.Reg.epi if args['results_dir'] is not None: rcfe_setup.config['results_directory'] = args['results_dir'] if args['draw_graphs'] == 0: rcfe_setup.config['graphs'] = False if args['bias_correction'] == 0: rcfe_setup.config['bias_correction'] = False from rcfe_pipeline_setup import full_process from rcfe_pipeline_setup import input_handler_node from rcfe_pipeline_setup import accept_input
def get_files(subject_id, session, task, raw_data_dir, preprocessed_data_dir, space=None, run=[], strict=True, **kwargs): """ Given some information, retrieve all the files and metadata from a BIDS-formatted dataset that will be passed to the analysis pipeline. """ from bids import BIDSLayout # only the raw files have the correct metadata, eg TR, and the event files are here raw_layout = BIDSLayout(raw_data_dir, validate=False, derivatives=False) preproc_layout = BIDSLayout(preprocessed_data_dir, validate=False) subjects = preproc_layout.get_subjects() assert subject_id in subjects and subject_id in raw_layout.get_subjects( ), "Subject not found!" sessions = preproc_layout.get_sessions() assert session in sessions, "Session not found!" tasks = preproc_layout.get_tasks() assert task in tasks, "Task not found!" if space == "None": space = None if space is None: print("Space is None") bolds = sorted([ f for f in preproc_layout.get(subject=subject_id, session=session, task=task, run=run, suffix='bold', extension=['nii.gz'], return_type='file') ]) else: bolds = sorted([ f for f in preproc_layout.get(subject=subject_id, session=session, task=task, run=run, suffix='bold', extension=['nii.gz'], return_type='file') if f"space-{space}" in f ]) print(f"BOLDS: {len(bolds)}\n{bolds}") if space is None: masks = sorted([ f for f in preproc_layout.get(subject=subject_id, suffix='mask', session=session, task=task, extension=['nii.gz'], return_type='file') ]) if not masks: masks = sorted([ f for f in preproc_layout.get(subject=subject_id, suffix='mask', session=session, extension=['nii.gz'], return_type='file') ]) else: masks = sorted([ f for f in preproc_layout.get(subject=subject_id, suffix='mask', session=session, task=task, extension=['nii.gz'], return_type='file') if f"space-{space}" in f ]) if not masks: masks = sorted([ f for f in preproc_layout.get(subject=subject_id, suffix='mask', session=session, extension=['nii.gz'], return_type='file') if f"space-{space}" in f ]) if len(masks ) == 1: # there is only one mask and it is to be used for all runs masks = masks * len(bolds) print(f"Masks: {len(masks)}\n{masks}") eventfiles = sorted( raw_layout.get(subject=subject_id, suffix='events', task=task, session=session, run=run, extension=['tsv'], return_type='file')) print(f"Eventfiles: {len(eventfiles)}\n{eventfiles}") raw_bolds = sorted( raw_layout.get(subject=subject_id, suffix='bold', task=task, session=session, run=run, extension=['nii.gz'], return_type='file')) TRs = [raw_layout.get_metadata(f)['RepetitionTime'] for f in raw_bolds] print(TRs, len(TRs)) confounds = sorted( preproc_layout.get(subject=subject_id, suffix="regressors", task=task, session=session, run=run, extension=['tsv'], return_type='file')) print(f"Confounds: {len(confounds)}\n{confounds}") if not confounds: confounds = [''] * len(bolds) #print(list(zip(bolds, masks, eventfiles, TRs))) # edit 11/9/18 - remove assert on event files, since some early hemifield scans don't have it # but warn! if (len(eventfiles) != len(bolds)): print("Some functional runs do not have corresponding event files!") assert TRs.count(TRs[0]) == len( TRs ), "Not all TRs are the same!" # all runs for a particular task must have same TR if strict: assert len(bolds) == len( masks ) > 0, "Input lists are not the same length!" # used to also check for ==len(confounds) TR = TRs[0] return bolds, masks, eventfiles, TR, confounds
Created on Wed Mar 6 09:43:43 2019 @author: Or Duek small script that will remove (MB4iPAT2) from filename Script was replaces and merged with creatBIDS.py """ import os from bids import BIDSLayout data_dir = '/media/Data/rcfTest' #data_dir = '/media/Data/kpe_forFmriPrep' layout = BIDSLayout(data_dir) layout.get_sessions() layout.get() #, extension='nii.gz')[0].filename # maybe need to change the way we look for all bold files with these parenthases for i in source_epi: a = i.filename # print (i.filename) if a.find('(MB4iPAT2)') != -1: print("We have found an issue with ", a) b = a.split( '(MB4iPAT2)' ) # this is the part that will be omitted from the file name. If you have an extra - you should add that too. c = b[0] + b[1] # cmobine toghether #change filename os.rename(a, c)
if args.freesurfer_dir: freesurfer_dir = args.freesurfer_dir else: freesurfer_dir = os.path.join(args.out_dir, "freesurfer") out_dir = os.path.join(args.out_dir, "baracus") if not os.path.isdir(freesurfer_dir): os.makedirs(freesurfer_dir) if not os.path.isdir(out_dir): os.makedirs(out_dir) model_dir = resource_filename(Requirement.parse("baracus"), 'models') run("bids-validator " + args.bids_dir) layout = BIDSLayout(args.bids_dir) truly_longitudinal_study = True if len(layout.get_sessions()) > 1 else False subjects_to_analyze, sessions_to_analyze, freesurfer_subjects_to_analyze = get_subjects_session(layout, args.participant_label, truly_longitudinal_study) if args.analysis_level == "participant": data_files = run_prepare_all(args.bids_dir, freesurfer_dir, out_dir, subjects_to_analyze, sessions_to_analyze, args.n_cpus, args.license_key, args.skip_missing) ### REGRESS OUT SITE HERE ### if args.confound_file: confound_file = args.confound_file remove_confounds(data_files, confound_file)
def generate_inputs( bids_dir, pybids_inputs, derivatives=False, pybids_config=None, search_terms=None, limit_to=None, participant_label=None, exclude_participant_label=None, ): """Dynamically generate snakemake inputs using pybids_inputs dict, and pybids to parse the bids dataset. Parameters ---------- bids_dir : str Path to bids directory pybids_inputs : dict Configuration for bids inputs, with keys as the names (``str``) Nested `dicts` with the following required keys: * ``"filters"``: Dictionary containing keyword arguments that will be passed to pybids ``get()``. * ``"wildcards"``: List of (str) bids tags to include as wildcards in snakemake. At minimum this should usually include ``['subject','session']``, plus any other wildcards that you may want to make use of in your snakemake workflow, or want to retain in the output paths. Any wildcards in this list that are not in the filename will just be ignored. Returns ------- dict: The dict returned by this functions contains seven items. Each of the following four items is a dict containing one item for each modality described by ``pybids_inputs``. * ``"input_path"``: String with a wildcard-filled path that matches the images for this modality. * ``"input_zip_lists"``: Dictionary where each key is a wildcard entity and each value is a list of the values found for that entity. Each of these lists has length equal to the number of images matched for this modality, so they can be zipped together to get a list of the wildcard values for each image. * ``"input_lists"``: Dictionary where each key is a wildcard entity and each value is a list of the unique values found for that entity. These lists may not be the same length. * ``"input_wildcards"``: Dictionary where each key is the name of a wildcard entity, and each value is the Snakemake wildcard used for that entity. Then there are three more top-level entries in the dictionary: * ``"subjects"``: A list of the subjects in the dataset. * ``"sessions"``: A list of the sessions in the dataset. * ``"subj_wildcards"``: The subject and session wildcards applicable to this dataset. ``{"subject": "{subject}"}`` if there is only one session, ``{"subject": "{subject}", "session": "{session}"}`` if there are multiple sessions. Notes ----- As an example, consider the following BIDS dataset:: bids-example/ ├── dataset_description.json ├── participants.tsv ├── README └── sub-control01 ├── anat │ ├── sub-control01_T1w.json │ ├── sub-control01_T1w.nii.gz │ ├── sub-control01_T2w.json │ └── sub-control01_T2w.nii.gz ├── dwi │ ├── sub-control01_dwi.bval │ ├── sub-control01_dwi.bvec │ └── sub-control01_dwi.nii.gz ├── fmap │ ├── sub-control01_magnitude1.nii.gz │ ├── sub-control01_phasediff.json │ ├── sub-control01_phasediff.nii.gz │ └── sub-control01_scans.tsv └── func ├── sub-control01_task-nback_bold.json ├── sub-control01_task-nback_bold.nii.gz ├── sub-control01_task-nback_events.tsv ├── sub-control01_task-nback_physio.json ├── sub-control01_task-nback_physio.tsv.gz ├── sub-control01_task-nback_sbref.nii.gz ├── sub-control01_task-rest_bold.json ├── sub-control01_task-rest_bold.nii.gz ├── sub-control01_task-rest_physio.json └── sub-control01_task-rest_physio.tsv.gz With the following ``pybids_inputs`` defined in the config file:: pybids_inputs: bold: filters: suffix: 'bold' extension: '.nii.gz' datatype: 'func' wildcards: - subject - session - acquisition - task - run Then ``generate_inputs(bids_dir, pybids_input)`` would return the following dictionary:: { "input_path": { "bold": "bids-example/sub-{subject}/func/sub-{subject}_task-{task}_bold.nii.gz" }, "input_zip_lists": { "bold": { "subject": ["control01", "control01"], "task": ["nback", "rest"] } }, "input_lists": { "bold": { "subject": ["control01"], "task": ["nback", "rest"] } }, "input_wildcards": { "bold": { "subject": "{subject}", "task": "{task}" } }, "subjects": ["subject01"], "sessions": [], "subj_wildcards": {"subject": "{subject}"} } """ # noqa search_terms = _generate_search_terms(participant_label, exclude_participant_label) if os.path.exists(bids_dir): # generate inputs based on config layout = BIDSLayout( bids_dir, derivatives=derivatives, validate=False, # In the next version of pybids, config will accept Paths, so we won't # need this long stringify line config=str(pybids_config) if pybids_config is not None else pybids_config, indexer=BIDSLayoutIndexer(validate=False, index_metadata=False), ) else: _logger.info( "bids_dir does not exist, skipping PyBIDS and using " "custom file paths only" ) layout = None # this will populate input_path, input_lists, input_zip_lists, and # input_wildcards inputs_config_dict = _get_lists_from_bids( bids_layout=layout, pybids_inputs=pybids_inputs, limit_to=limit_to, **search_terms, ) if layout is None: # if no layout, then use subjects/sessions from --path vars subjects = [] sessions = [] for input_type in inputs_config_dict["input_lists"]: subj_set = set(inputs_config_dict["input_lists"][input_type]["subject"]) # filter the list of subjects with participant_label if participant_label is not None: subj_set = set.intersection(subj_set, set(participant_label)) # TODO: need to also remove subjects based on exclude_participant_label # replace with filtered list inputs_config_dict["input_lists"][input_type]["subject"] = list(subj_set) # add to set of subjects from all input_types subjects.append(subj_set) if "session" in (inputs_config_dict["input_lists"][input_type].keys()): sessions.append( {inputs_config_dict["input_lists"][input_type]["session"]} ) else: sessions.append(set([])) # take set intersection of all input types inputs_config_dict["subjects"] = list(set.intersection(*subjects)) inputs_config_dict["sessions"] = list(set.intersection(*sessions)) else: # populate subjects, sessions and subj_wildcards in the config inputs_config_dict["subjects"] = layout.get_subjects(**search_terms) inputs_config_dict["sessions"] = layout.get_sessions(**search_terms) if len(inputs_config_dict["sessions"]) == 0: inputs_config_dict["subj_wildcards"] = {"subject": "{subject}"} else: inputs_config_dict["subj_wildcards"] = { "subject": "{subject}", "session": "{session}", } return inputs_config_dict