def create_bids_datasource(data_dir): bids_datasource = pe.Node(interface=nio.BIDSDataGrabber(), name='bids_datasource') bids_datasource.inputs.base_dir = data_dir bids_datasource.inputs.output_query = { 'T1': { "datatype": "anat", "suffix": "T1w", "extensions": ["nii", ".nii.gz"] } } layout = BIDSLayout(data_dir) print(layout) print(layout.get_subjects()) print(layout.get_sessions()) iterables = [] if len(layout.get_subjects()) == 1: bids_datasource.inputs.subject = layout.get_subjects()[0] else: iterables.append(('subject', layout.get_subjects()[:2])) if len(layout.get_sessions()) == 1: bids_datasource.inputs.session = layout.get_sessions()[0] else: iterables.append(('session', layout.get_sessions()[:2])) if len(iterables): bids_datasource.iterables = iterables return bids_datasource
def create_datasource_indiv_params(data_dir, indiv_params, subjects=None, sessions=None, acquisitions=None): """ Create a datasource node that have iterables following BIDS format, including a indiv_params file""" bids_datasource = pe.Node(interface=BIDSDataGrabberParams(indiv_params), name='bids_datasource') bids_datasource.inputs.base_dir = data_dir bids_datasource.inputs.output_query = { 'T1': { "datatype": "anat", "suffix": "T1w", "extensions": ["nii", ".nii.gz"] }, 'T2': { "datatype": "anat", "suffix": "T2w", "extensions": ["nii", ".nii.gz"] } } layout = BIDSLayout(data_dir) # Verbose print("BIDS layout:", layout) print("\t", layout.get_subjects()) print("\t", layout.get_sessions()) if subjects is None: subjects = layout.get_subjects() if sessions is None: sessions = layout.get_sessions() iterables = [] iterables.append(('subject', subjects)) iterables.append(('session', sessions)) if acquisitions is not None: iterables.append(('acquisition', acquisitions)) bids_datasource.iterables = iterables return bids_datasource
def create_datasource_indiv_params(output_query, data_dir, indiv_params, subjects=None, sessions=None, acquisitions=None, reconstructions=None): """ Create a datasource node that have iterables following BIDS format, including a indiv_params file""" bids_datasource = pe.Node(interface=BIDSDataGrabberParams(indiv_params), name='bids_datasource') bids_datasource.inputs.base_dir = data_dir bids_datasource.inputs.output_query = output_query layout = BIDSLayout(data_dir) # Verbose print("BIDS layout:", layout) print("\t", layout.get_subjects()) print("\t", layout.get_sessions()) if subjects is None: subjects = layout.get_subjects() if sessions is None: sessions = layout.get_sessions() iterables = [] iterables.append(('subject', subjects)) iterables.append(('session', sessions)) if acquisitions is not None: iterables.append(('acquisition', acquisitions)) if reconstructions is not None: iterables.append(('reconstruction', reconstructions)) bids_datasource.iterables = iterables return bids_datasource
import os from bids.layout import BIDSLayout # Directory where your data set resides. dataDir = '/tmp/Data/ds114' # Creating the layout object for this BIDS data set layout = BIDSLayout(dataDir) # subjects subjList = layout.get_subjects() # sessions sesList = layout.get_sessions() # tasks taskList = layout.get_tasks() # runs runList = layout.get_runs() # List of all fMRI data for subject 01 fMRI_sub01 = layout.get(subject='01', suffix='bold', extension=['nii', 'nii.gz'], return_type='file') # Lets focus on test session fMRI_sub01_test = layout.get(subject='01', session='test', suffix='bold',
def read_bids_dataset(bids_input, subject_list=None, session_list=None, collect_on_subject=False): """ extracts and organizes relevant metadata from a bids dataset necessary for the dcan-modified hcp fmri processing pipeline. :param bids_input: path to input bids folder :param subject_list: EITHER, a list of subject ids to filter on, OR a dictionary of subject id: list of sessions to filter on. :param session_list: a list of session ids to filter on. :param collect_on_subject: collapses all sessions, for cases with non-longitudinal data spread across scan sessions. :return: bids data struct (nested dict) spec: { t1w: t1w filename list, t2w: t2w filename list, t1w_metadata: bids meta data (first t1), t2w_metadata: bids meta data (first t2), func: fmri filename list, func_metadata: bids meta data list, fmap: { positive: spin echo filename list (if applicable) negative: spin echo filename list (if applicable) }, fmap_metadata: { positive: bids meta data list (if applicable) negative: bids meta data list (if applicable) }, } """ layout = BIDSLayout(bids_input, index_metadata=True) subjects = layout.get_subjects() # filter subject list if isinstance(subject_list, list): subjects = [s for s in subjects if s in subject_list] elif isinstance(subject_list, dict): subjects = [s for s in subjects if s in subject_list.keys()] subsess = [] # filter session list for s in subjects: sessions = layout.get_sessions(subject=s) # filter sessions_list if isinstance(session_list, list): sessions = [t for t in sessions if t in session_list] if not sessions: subsess += [(s, None)] elif collect_on_subject: subsess += [(s, sessions)] else: subsess += list(product([s], sessions)) assert len(subsess), 'bids data not found for participants. If labels ' \ 'were provided, check the participant labels for errors. ' \ 'Otherwise check that the bids folder provided is correct.' for subject, sessions in subsess: # get relevant image datatypes anat, anat_types = set_anatomicals(layout, subject, sessions) func, func_types = set_functionals(layout, subject, sessions) fmap, fmap_types = set_fieldmaps(layout, subject, sessions) bids_data = { 'subject': subject, 'session': sessions if not collect_on_subject else None, 'types': anat_types.union(func_types, fmap_types) } bids_data.update(anat) bids_data.update(func) bids_data.update(fmap) yield bids_data
def eval_multiclass_metrics_dataset(dataset_name, man_analysis_name="manual_segmentation", suffix="space-orig_desc-brain_dseg", output_dir="evaluation_results"): data_dir = os.path.join(data_path, dataset_name) layout = BIDSLayout(data_dir) # Verbose print("BIDS layout:", layout) subjects = layout.get_subjects() sessions = layout.get_sessions() print(subjects) print(sessions) res_eval_path = os.path.join(data_dir, "derivatives", output_dir) try: os.makedirs(res_eval_path) except OSError: print("res_eval_path {} already exists".format(res_eval_path)) results = [] for sub in subjects: if sub in ["032139", "032141", "032142", "032143"]: continue for ses in sessions: print("**** Running multiclass sub {} ses {} ****".format( sub, ses)) man_mask_file = os.path.join( data_dir, "derivatives", man_analysis_name, "sub-{}".format(sub), "ses-{}".format(ses), "anat", "sub-{}_ses-{}_{}.nii.gz".format(sub, ses, suffix)) assert os.path.exists( man_mask_file), "Error, could not find file {}".format( man_mask_file) for auto_analysis_name in auto_analysis_names: auto_mask_file = os.path.join( data_dir, "derivatives", auto_analysis_name, "sub-{}".format(sub), "ses-{}".format(ses), "anat", "sub-{}_ses-{}_{}.nii.gz".format(sub, ses, suffix)) assert os.path.exists( auto_mask_file), "Error, could not find file {}".format( auto_mask_file) eval_name = "manual-{}".format(auto_analysis_name) print("Comparing multiclass {} and {}".format( man_analysis_name, auto_analysis_name)) list_res = compute_all_multiclass_metrics( man_mask_file, auto_mask_file) list_res.insert(0, eval_name) list_res.insert(0, ses) list_res.insert(0, sub) results.append(list_res) #sub, ses, eval_name, VP, FP, VN, FN, kappa, dice, JC df = pd.DataFrame(results, columns=[ "Subject", "Session", "Evaluation", "ICC", "VP", "FP", "VN", "FN", "Kappa", "Dice", "Jaccard" ]) csv_name = "multiclass_" + dataset_name + "_eval_res.csv" df.to_csv(os.path.join(res_eval_path, csv_name)) return df
def find_data(self, subject_ids=None, visit_ids=None): """ Return subject and session information for a project in the local repository Parameters ---------- subject_ids : list(str) List of subject IDs with which to filter the tree with. If None all are returned visit_ids : list(str) List of visit IDs with which to filter the tree with. If None all are returned Returns ------- project : arcana.repository.Tree A hierarchical tree of subject, session and fileset information for the repository """ filesets = [] layout = BIDSLayout(self.root_dir) all_subjects = layout.get_subjects() all_visits = layout.get_sessions() if not all_visits: all_visits = [self.DEFAULT_VISIT_ID] self._depth = 1 else: self._depth = 2 for item in layout.get(return_type='object'): if item.path.startswith(self.derivatives_dir): # We handle derivatives using the BasicRepo base # class methods continue if not hasattr(item, 'entities') or not item.entities.get('suffix', False): logger.warning("Skipping unrecognised file '{}' in BIDS tree" .format(op.join(item.dirname, item.filename))) continue # Ignore hidden file try: subject_ids = [item.entities['subject']] except KeyError: # If item exists in top-levels of in the directory structure # it is inferred to exist for all subjects in the tree subject_ids = all_subjects try: visit_ids = [item.entities['session']] except KeyError: # If item exists in top-levels of in the directory structure # it is inferred to exist for all visits in the tree visit_ids = all_visits for subject_id in subject_ids: for visit_id in visit_ids: aux_files = {} metadata = layout.get_metadata(item.path) if metadata and not item.path.endswith('.json'): # Write out the combined JSON side cars to a temporary # file to include in extended NIfTI filesets metadata_path = op.join( self.metadata_dir, 'sub-{}'.format(subject_id), 'ses-{}'.format(visit_id), item.filename + '.json') os.makedirs(op.dirname(metadata_path), exist_ok=True) if not op.exists(metadata_path): with open(metadata_path, 'w') as f: json.dump(metadata, f) aux_files['json'] = metadata_path fileset = BidsFileset( path=op.join(item.dirname, item.filename), type=item.entities['suffix'], subject_id=subject_id, visit_id=visit_id, repository=self, modality=item.entities.get('modality', None), task=item.entities.get('task', None), aux_files=aux_files) filesets.append(fileset) # Get derived filesets, fields and records using the same method using # the method in the BasicRepo base class derived_filesets, fields, records = super().find_data( subject_ids=subject_ids, visit_ids=visit_ids) filesets.extend(derived_filesets) return filesets, fields, records
def main(): parser = makeParser() results = parser.parse_args() if results.boutiques: createDescriptor(parser, results) return 0 verb = results.verbose fsldir = results.fsldir mni152 = op.join(fsldir, "data", "standard", "MNI152_T1_2mm_brain.nii.gz") mni152bn = op.basename(mni152).split(".")[0] outdir = results.output_dir partis = results.participant_label labels = results.parcellation if verb: print("BIDS Dir: {0}".format(results.bids_dir), flush=True) print("Output Dir: {0}".format(results.output_dir), flush=True) print("Analysis level: {0}".format(results.analysis_level), flush=True) # This preprocessing workflow is modified from the FSL recommendations here: # https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FDT/UserGuide # Step 0, 1: Begin interrogation of BIDS dataset # Due to current super-linear slowdown in BIDS Layout, exclude all but # participant of interest. Explored in the following Github issue: # https://github.com/bids-standard/pybids/issues/285 if partis is not None: pattrn = 'sub-(?!{0})(.*)$'.format("|".join(partis)) else: pattrn = '' dset = BIDSLayout(results.bids_dir, exclude=pattrn) subjects = dset.get_subjects() if results.participant_label is not None: subjects = [pl for pl in results.participant_label if pl in subjects] assert (len(subjects) > 0) if verb: print("Participants: {0}".format(", ".join(s for s in subjects)), flush=True) sessions = dset.get_sessions() if results.session_label is not None: sessions = [sl for sl in results.session_label if sl in sessions] assert (len(sessions) > 0) if verb: print("Sessions: {0}".format(", ".join(s for s in sessions)), flush=True) # Step 0, 2: Prune dataset to subjects/sessions that have necessary files ftypes = [".nii", ".bval", ".bvec"] collections = [] for subj in subjects: for sess in sessions: tf_dwi = dset.get(subject=subj, session=sess, datatype="dwi", suffix="dwi", return_type="file") tf_anat = dset.get(subject=subj, session=sess, datatype="anat", suffix="T1w", return_type="file") if (all(any(ftype in fl for fl in tf_dwi) for ftype in ftypes) and any(ftypes[0] in fl for fl in tf_anat)): collections += [{ "subject": subj, "session": sess, "anat": [t for t in tf_anat if ftypes[0] in t][0], "bval": [t for t in tf_dwi if ftypes[1] in t][0], "bvec": [t for t in tf_dwi if ftypes[2] in t][0], "dwi": [t for t in tf_dwi if ftypes[0] in t][0] }] else: if verb: print("Skipping sub-{0}".format(subj) + " / ses-{0} due to missing data.".format(sess), flush=True) complete_collection = [] for col in collections: dwibn = op.basename(col["dwi"]).split('.')[0] anatbn = op.basename(col["anat"]).split('.')[0] subses = op.join('sub-{0}'.format(col['subject']), 'ses-{0}'.format(col['session'])) derivdir_d = op.join(outdir, subses, "dwi") derivdir_a = op.join(outdir, subses, "anat") execute("mkdir -p {0}".format(derivdir_d), verbose=verb, skipif=op.isdir(derivdir_d)) execute("mkdir -p {0}".format(derivdir_a), verbose=verb, skipif=op.isdir(derivdir_a)) # Step 1: Extract B0 volumes # Make even number of spatial voxels? (req'd for eddy for some reason) # TODO : above, if actually needed - docs inconsistent # Get B0 locations with open(col["bval"]) as fhandle: bvals = fhandle.read().split(" ") bvals = [int(b) for b in bvals if b != '' and b != '\n'] b0_loc = [i for i, b in enumerate(bvals) if b == np.min(bvals)] # Get B0 volumes col["b0_scans"] = [] for idx, b0 in enumerate(b0_loc): b0ind = "b0_{0}".format(idx) col["b0_scans"] += [ op.join(derivdir_d, dwibn + "_" + b0ind + ".nii.gz") ] execute(fsl.fslroi(col["dwi"], col["b0_scans"][-1], *[b0, 1]), verbose=verb, skipif=op.isfile(col["b0_scans"][-1])) # Merge B0 volumes col["b0s"] = op.join(derivdir_d, dwibn + "_b0s.nii.gz") execute(fsl.fslmerge(col["b0s"], *col["b0_scans"]), verbose=verb, skipif=op.isfile(col["b0s"])) # Create acquisition parameters file col["acqparams"] = op.join(derivdir_d, dwibn + "_acq.txt") acqs = { "i": "1 0 0", "i-": "-1 0 0", "j": "0 1 0", "j-": "0 -1 0", "k": "0 0 1", "k-": "0 0 -1" } with open(col["acqparams"], 'w') as fhandle: meta = dset.get_metadata(path=col["dwi"]) pedir = meta["PhaseEncodingDirection"] trout = meta["TotalReadoutTime"] line = "{0} {1}".format(acqs[pedir], trout) fhandle.write("\n".join([line] * len(b0_loc))) # Step 1.5: Run Top-up on Diffusion data # TODO: remove; topup only applies with multiple PEs (rare in open data) # col["topup"] = op.join(derivdir_d, dwibn + "_topup") # col["hifi_b0"] = op.join(derivdir_d, dwibn + "_hifi_b0") # execute(fsl.topup(col["b0s"], col["acqparams"], # col["topup"], col["hifi_b0"]), # verbose=verb) # execute(fsl.fslmaths(col["hifi_b0"], "-Tmean", col["hifi_b0"]), # verbose=verb) # Step 2: Brain extraction # ... Diffusion: col["dwi_brain"] = op.join(derivdir_d, dwibn + "_brain.nii.gz") col["dwi_mask"] = op.join(derivdir_d, dwibn + "_brain_mask.nii.gz") execute(fsl.bet(col["dwi"], col["dwi_brain"], "-F", "-m"), verbose=verb, skipif=op.isfile(col["dwi_brain"])) # ... Structural: col["anat_brain"] = op.join(derivdir_a, anatbn + "_brain.nii.gz") col["anat_mask"] = op.join(derivdir_a, anatbn + "_brain.nii.gz") execute(fsl.bet(col["anat"], col["anat_brain"], "-m"), verbose=verb, skipif=op.isfile(col["anat_brain"])) # Step 3: Produce prelimary DTIfit QC figures col["dwi_qc_pre"] = op.join(derivdir_d, dwibn + "_dtifit_pre") execute(fsl.dtifit(col["dwi_brain"], col["dwi_qc_pre"], col["dwi_mask"], col["bvec"], col["bval"]), verbose=verb, skipif=op.isfile(col["dwi_qc_pre"] + "_FA.nii.gz")) # Step 4: Perform eddy correction # ... Create index col["index"] = op.join(derivdir_d, dwibn + "_eddy_index.txt") with open(col["index"], 'w') as fhandle: fhandle.write(" ".join(["1"] * len(bvals))) # ... Run eddy col["eddy_dwi"] = op.join(derivdir_d, dwibn + "_eddy") if results.gpu: eddy_exe = "eddy_cuda8.0" else: eddy_exe = "eddy_openmp" execute(fsl.eddy(col["dwi_brain"], col["dwi_mask"], col["acqparams"], col["index"], col["bvec"], col["bval"], col["eddy_dwi"], exe=eddy_exe), verbose=verb, skipif=op.isfile(col["eddy_dwi"] + ".nii.gz")) # Step 5: Registration to template # ... Compute transforms col["t1w2mni"] = op.join(derivdir_a, anatbn + "_to_mni_xfm.mat") execute(fsl.flirt(col["anat_brain"], omat=col["t1w2mni"], ref=mni152), verbose=verb, skipif=op.isfile(col["t1w2mni"])) col["dwi2t1w"] = op.join(derivdir_d, dwibn + "_to_t1w_xfm.mat") execute(fsl.flirt(col["eddy_dwi"], ref=col["anat_brain"], omat=col["dwi2t1w"]), verbose=verb, skipif=op.isfile(col["dwi2t1w"])) col["dwi2mni"] = op.join(derivdir_d, dwibn + "_to_mni_xfm.mat") execute(fsl.convert_xfm(concat=col["t1w2mni"], inp=col["dwi2t1w"], omat=col["dwi2mni"]), verbose=verb, skipif=op.isfile(col["dwi2mni"])) # ... Invert transforms towards diffusion space col["mni2dwi"] = op.join(derivdir_d, dwibn + "_from_mni_xfm.mat") execute(fsl.convert_xfm(inverse=col["dwi2mni"], omat=col["mni2dwi"]), verbose=verb, skipif=op.isfile(col["mni2dwi"])) col["t1w2dwi"] = op.join(derivdir_a, anatbn + "_dwi_xfm.mat") execute(fsl.convert_xfm(inverse=col["dwi2t1w"], omat=col["t1w2dwi"]), verbose=verb, skipif=op.isfile(col["t1w2dwi"])) # Step 6: Apply registrations to anatomical and template images col["anat_in_dwi"] = op.join(derivdir_a, anatbn + "_brain_dwi.nii.gz") execute(fsl.flirt(col["anat_brain"], applyxfm=True, out=col["anat_in_dwi"], init=col["t1w2dwi"], ref=col["eddy_dwi"]), verbose=verb, skipif=op.isfile(col["anat_in_dwi"])) col["mni_in_dwi"] = op.join( derivdir_d, ("atlas_" + dwibn + "_" + mni152bn + "_dwi.nii.gz")) execute(fsl.flirt(mni152, applyxfm=True, out=col["mni_in_dwi"], init=col["mni2dwi"], ref=col["eddy_dwi"]), verbose=verb, skipif=op.isfile(col["mni_in_dwi"])) # Step 7: Perform tissue segmentation on anatomical images in DWI space col["tissue_masks"] = op.join(derivdir_d, anatbn + "_fast") execute(fsl.fast(col["anat_in_dwi"], col["tissue_masks"], classes=3, imtype=1), verbose=verb, skipif=op.isfile(col["tissue_masks"] + "_seg_2.nii.gz")) # Step 8: Transform parcellations into DWI space col["labels_in_dwi"] = [] for label in labels: lbn = op.basename(label).split('.')[0] col["labels_in_dwi"] += [ op.join(derivdir_d, ("labels_" + dwibn + "_" + lbn + ".nii.gz")) ] execute(fsl.flirt(label, applyxfm=True, out=col["labels_in_dwi"][-1], init=col["mni2dwi"], ref=col["eddy_dwi"], interp="nearestneighbour"), verbose=verb, skipif=op.isfile(col["labels_in_dwi"][-1])) if verb: print("Finished processing sub-{0}".format(subj) + " / ses-{0} !".format(sess), flush=True) complete_collection += [col]
def eval_monoclass_metrics_dataset(dataset_name, man_analysis_name="manual_segmentation", suffix="space-orig_desc-brain_mask", output_dir="evaluation_results"): bids_dir = os.path.join(bids_path) # was very long layout = BIDSLayout(bids_dir) ### from https://github.com/bids-standard/pybids/pull/523 layout = BIDSLayout( bids_dir, database_file="bidsdb.sql", ) # Verbose print("BIDS layout:", layout) all_subjects = layout.get_subjects() all_sessions = layout.get_sessions() print(all_subjects) print(all_sessions) data_dir = os.path.join(data_path, dataset_name) res_eval_path = os.path.join(data_dir, "derivatives", output_dir) try: os.makedirs(res_eval_path) except OSError: print("res_eval_path {} already exists".format(res_eval_path)) results = [] for sub in subjects: assert sub in all_subjects, "Error, subject {} was not found in bids dir {}".format( sub, all_subjects) for ses in sessions: assert ses in all_sessions, "Error, session {} was not found in bids dir {}".format( ses, all_subjects) print("**** Running monoclass sub {} ses {} ****".format(sub, ses)) man_mask_file = os.path.join( data_dir, "derivatives", man_analysis_name, "sub-{}".format(sub), "ses-{}".format(ses), "anat", "sub-{}_ses-{}_{}.nii.gz".format(sub, ses, suffix)) assert os.path.exists( man_mask_file), "Error, could not find file {}".format( man_mask_file) for auto_analysis_name in auto_analysis_names: auto_mask_file = os.path.join( data_dir, "derivatives", auto_analysis_name, "sub-{}".format(sub), "ses-{}".format(ses), "anat", "sub-{}_ses-{}_{}.nii.gz".format(sub, ses, suffix)) assert os.path.exists( auto_mask_file), "Error, could not find file {}".format( auto_mask_file) eval_name = "manual-{}".format(auto_analysis_name) print("Comparing monoclass {} and {}".format( man_analysis_name, auto_analysis_name)) list_res = compute_all_monoclass_metrics( man_mask_file, auto_mask_file, pref=os.path.join(res_eval_path, sub + "_" + ses + "_" + eval_name + "_")) list_res.insert(0, eval_name) list_res.insert(0, ses) list_res.insert(0, sub) results.append(list_res) #sub, ses, eval_name, VP, FP, VN, FN, kappa, dice, JC, LCE, GCE df = pd.DataFrame(results, columns=[ "Subject", "Session", "Evaluation", "VP", "FP", "VN", "FN", "Kappa", "Dice", "Jaccard", "LCE", "GCE" ]) csv_name = "monoclass_" + dataset_name + "_eval_res.csv" df.to_csv(os.path.join(res_eval_path, csv_name)) return df