def calc_demos(output_dir, info_out_dir, demo_file, pwd, new_id_lut_file=None): ''' Calcluates demos from acq_time ''' # get qcq_dates acq_dates = get_acq_dates(info_out_dir) assert pwd != "", "password empty" demo_df = read_protected_file( demo_file, pwd, "demos.txt").reset_index().rename( columns={"subject_id": "old_participant_id"}) demo_df["participant_id"] = get_public_sub_id(demo_df.old_participant_id, new_id_lut_file) demo_df["participant_id"] = "sub-" + demo_df["participant_id"] demo_df = demo_df.drop(columns=["old_participant_id"]) df = pd.merge(acq_dates, demo_df, how="left", on="participant_id") df["dob"] = pd.to_datetime(df.dob) df["acq_date"] = pd.to_datetime(df.acq_date) df["age"] = ((df.acq_date - df.dob).dt.days / 365.25).apply(np.round, decimals=1) df = df[["participant_id", "session_id", "sex", "age"]] to_tsv(df, output_dir / "participants.tsv") subjects = df.participant_id.unique() print( f"\n\n\n\nDONE.\nExported demos for {len(subjects)} subjects.\n {subjects}" )
def update_sub_scans_file(output_dir, bids_sub, bids_ses, bids_modality, out_filename, par_file, public_output=True): """ one file per subject with -ses id -filename -if not public: date of acquisition """ general_info, image_defs = read_par(par_file) acq_time = parse_acq_time(general_info) scans_file = os.path.join(output_dir, bids_sub, bids_sub + "_scans.tsv") if os.path.exists(scans_file): scans = read_tsv(scans_file) else: scans = pd.DataFrame([]) scans = scans.append( { "participant_id": bids_sub, "session_id": bids_ses, "filename": bids_ses + "/" + bids_modality + "/" + out_filename + ".nii.gz", "acq_time": acq_time }, ignore_index=True) if not public_output: scans = scans[["participant_id", "session_id", "filename", "acq_time"]] else: scans = scans[["participant_id", "session_id", "filename"]] to_tsv(scans, scans_file)
def save_physio(output_filename_base, meta_data, physio_data): tsv_filename = output_filename_base + ".tsv.gz" json_filename = output_filename_base + ".json" header = physio_data.columns.tolist() json_data = {"Columns": header, "StartTime": 0, "SamplingFrequency": 496} add_info_to_json(json_filename, json_data, create_new=True) to_tsv(physio_data, tsv_filename, header=False)
def save_physio(output_filename_base, meta_data, physio_data): tsv_filename = output_filename_base + ".tsv" json_filename = output_filename_base + ".json" header = physio_data.columns.tolist() json_data = {"header": header, "meta_data": meta_data} add_info_to_json(json_filename, json_data, create_new=True) to_tsv(physio_data, tsv_filename, header=False)
def reduce_sub_files(bids_dir, output_file, sub_file): df = pd.DataFrame([]) layout = BIDSLayout(bids_dir) files = layout.get(extensions=sub_file) for file in [f.filename for f in files]: print(file) df_ = read_tsv(file) df = pd.concat((df, df_)) to_tsv(df, os.path.join(bids_dir, output_file))
def get_scan_duration(output_dir, modality="func", task="rest"): """ """ layout = BIDSLayout(output_dir) subjects_list = layout.get_subjects() scan_duration = pd.DataFrame([]) # for sub_id in subjects_list: sub_dir = os.path.join(output_dir, "sub-" + sub_id) ses_id_list = layout.get_sessions(subject=sub_id) for ses_id in ses_id_list: sub_ses_path = os.path.join(sub_dir, "ses-" + ses_id) f = layout.get(subject=sub_id, session=ses_id, modality=modality, task=task, extensions='.nii.gz') if len(f) > 1: raise Exception( "something went wrong, more than one %s %s file detected: %s" % (modality, task, f)) elif len(f) == 1: duration = (layout.get_metadata( f[0].filename)["ScanDurationSec"]) scan_duration_sub = pd.DataFrame( OrderedDict([("subject_id", sub_id), ("sesssion_id", ses_id), ("scan_duration_s", [duration])])) scan_duration = scan_duration.append(scan_duration_sub) out_str = modality if task: out_str += "_" + task output_file = os.path.join(output_dir, "scan_duration_%s.tsv" % out_str) print("Writing scan duration to %s" % output_file) to_tsv(scan_duration, output_file)
# get subject list raw_subjects_list = [] for ses in ses_id_list: search_dir = os.path.join(raw_dir, ses, in_ses_folder) os.chdir(search_dir) raw_subjects_list += sorted(glob.glob("lhab*")) old_subject_id_list = sorted(list(set([s[:9] for s in raw_subjects_list]))) # export all subjects list all_sub_file = os.path.join(output_dir, "lhab_all_subjects.tsv") if os.path.exists(all_sub_file): raise FileExistsError("\n%s exists. Stopped and did nothing." % all_sub_file) all_sub = pd.DataFrame({"sub_id": old_subject_id_list}, index=old_subject_id_list) to_tsv(all_sub, all_sub_file) print("All-subject list written to %s" % all_sub_file) os.chmod(all_sub_file, 0o444) print("Permission of %s set to read only\n" % all_sub_file) # create new ids new_id = [] for i in range(1, 1000): new_id.append('lhabX{:04d}'.format(i)) # for good measures, shuffle n_sub = len(old_subject_id_list) new_id_used = new_id[:n_sub] random.shuffle(new_id_used) new_id_spare = new_id[n_sub:]
def calc_demos( output_dir, ses_id_list, raw_dir, in_ses_folder, demo_file, pwd, use_new_ids=True, new_id_lut_file=None, public_output=True, ): ''' Calcluates demos from acq_time ''' assert pwd != "", "password empty" demo_df = read_protected_file(demo_file, pwd, "demos.txt") out_demo_df = pd.DataFrame([]) out_acq_time_df = pd.DataFrame([]) layout = BIDSLayout(output_dir) new_sub_id_list = layout.get_subjects() for new_subject_id in new_sub_id_list: old_subject_id = get_private_sub_id(new_subject_id, new_id_lut_file) for old_ses_id in ses_id_list: subject_ses_folder = os.path.join(raw_dir, old_ses_id, in_ses_folder) os.chdir(subject_ses_folder) subject_folder = sorted(glob(old_subject_id + "*")) assert len( subject_folder ) < 2, "more than one subject folder %s" % old_subject_id if subject_folder: subject_folder = subject_folder[0] abs_subject_folder = os.path.abspath(subject_folder) os.chdir(abs_subject_folder) if use_new_ids: bids_sub = new_subject_id else: bids_sub = get_clean_subject_id(old_subject_id) bids_ses = get_clean_ses_id(old_ses_id) par_file_list = glob(os.path.join(abs_subject_folder, "*.par")) if par_file_list: par_file = par_file_list[0] df_subject, df_acq_time_subject = fetch_demos( demo_df, old_subject_id, bids_sub, bids_ses, par_file) out_demo_df = pd.concat((out_demo_df, df_subject)) out_acq_time_df = pd.concat( (out_acq_time_df, df_acq_time_subject)) to_tsv(out_demo_df, os.path.join(output_dir, "participants.tsv")) if not public_output: to_tsv(out_acq_time_df, os.path.join(output_dir, "acq_time.tsv")) print("\n\n\n\nDONE.\nExported demos for %d subjects." % len(new_sub_id_list)) print(new_sub_id_list)
def compare_par_nii(output_dir, old_sub_id_list, raw_dir, ses_id_list, in_ses_folder, info_list, new_id_lut_file): """ - Checks that all subjects from subject list are in sourcedata - Checks that par and nii filecount agrees - Exports nii filecount to output_dir """ # first check that all subjects from id list are in the output_dir print("\nchecking that all subjects from id list are in the output_dir...") layout = BIDSLayout(output_dir) subjects_list = layout.get_subjects() for old_sub_id in old_sub_id_list: new_sub_id = get_public_sub_id(old_sub_id, new_id_lut_file) sub_dir = os.path.join(output_dir, "sub-" + new_sub_id) f = glob(sub_dir) if not f: raise Exception("No folder not found: %s" % sub_dir) print("%d subjects from list found in folder %s. Seems OK...\n" % (len(old_sub_id_list), output_dir)) # compare filecount of par and nii files and export filecount = pd.DataFrame([]) for new_sub_id in subjects_list: old_sub_id = get_private_sub_id(new_sub_id, new_id_lut_file) for old_ses_id in ses_id_list: new_ses_id = "tp" + old_ses_id[-1] sub_ses_par_dir = os.path.join( raw_dir, old_ses_id, in_ses_folder, old_sub_id + "_t%s_raw" % new_ses_id[-1]) sub_ses_nii_dir = os.path.join(output_dir, "sub-" + new_sub_id, "ses-" + new_ses_id) n_files = OrderedDict([("subject_id", new_sub_id), ("session_id", new_ses_id)]) for info in info_list: par_search_str = os.path.join( sub_ses_par_dir, "*" + info["search_str"] + "*.par") par_f = glob(par_search_str) n_files_par = len(par_f) if "acq" in info.keys(): acq_str = "_acq-" + info["acq"] else: acq_str = "" if "direction" in info.keys(): dir_str = "_dir-" + info["direction"] else: dir_str = "" nii_search_str = os.path.join( sub_ses_nii_dir, info["bids_modality"], "*" + acq_str + "*" + dir_str + "*" + info["bids_name"] + "*.nii.gz") nii_f = glob(nii_search_str) n_files_nifti = len(nii_f) c = info["bids_modality"] + "_" + info["bids_name"] + \ acq_str.replace("-", "") + dir_str.replace("-", "") n_files[c] = [n_files_nifti] if not n_files_par == n_files_nifti: raise Exception( "missmatch between par and nii file count %s %s %s %s" % (new_sub_id, new_ses_id, par_search_str, nii_search_str)) # TODO check physio if "physio" in info.keys() and info["physio"]: phys_par_search_str = os.path.join( sub_ses_par_dir, "*" + info["search_str"] + "*_physio.log") phys_par_f = glob(phys_par_search_str) phys_n_files_par = len(phys_par_f) phys_nii_search_str = os.path.join( sub_ses_nii_dir, info["bids_modality"], "*" + acq_str + "*" + dir_str + "*" + info["bids_name"] + "*_physio.tsv") phys_nii_f = glob(phys_nii_search_str) phys_n_files_nifti = len(phys_nii_f) c = info["bids_modality"] + "_" + info["bids_name"] + \ acq_str.replace("-", "") + dir_str.replace("-", "") + "_physio" n_files[c] = [phys_n_files_nifti] if not phys_n_files_par == phys_n_files_nifti: raise Exception( "missmatch between par and nii file count %s %s %s %s" % (new_sub_id, new_ses_id, phys_par_search_str, phys_nii_search_str)) filecount = filecount.append(pd.DataFrame(n_files)) output_file = os.path.join(output_dir, "n_files.tsv") to_tsv(filecount, output_file) print("Compared filecount from par and nifti files. Seems OK...") print("Filecount written to %s" % output_file)