def convert_t2pd_image(dicom_dir, tmp_output_dir, tmp_filename, target_path_t2, target_path_pd): """ Splits interleaved T2 / PD image into 2 images, using https://github.com/rordenlab/dcm2niix. Requires dcm2niix to be installed, e.g. `brew install dcm2niix` """ # file processing dcm2niix_cmd = "dcm2niix -d 0 -f %s -o %s %s" % (tmp_filename, tmp_output_dir, dicom_dir) args = shlex.split(dcm2niix_cmd) process = subprocess.Popen(args, env=config.path_environment) process.wait() print("dcm2niix_cmd terminated with return code: '%s'" % process.returncode) # file management if process.returncode == 0: tmp_path_to_T2w = os.path.join(tmp_output_dir, tmp_filename + '_e2.nii') if os.path.exists(tmp_path_to_T2w): gt.ensure_dir_exists(target_path_t2) shutil.move(tmp_path_to_T2w, target_path_t2) print("- copied T2w image: %s -> %s" % (tmp_path_to_T2w, target_path_t2)) tmp_path_to_PDw = os.path.join(tmp_output_dir, tmp_filename + '_e1.nii') if os.path.exists(tmp_path_to_PDw): gt.ensure_dir_exists(target_path_pd) shutil.move(tmp_path_to_PDw, target_path_pd) print("- copied T2w image: %s -> %s" % (tmp_path_to_PDw, target_path_pd))
def create_path(self, path_pattern_list=None, abs_path=True, create=True, **kwargs): if path_pattern_list: path = self.bids_layout.build_path(kwargs, path_pattern_list) else: path = self.bids_layout.build_path(kwargs) if abs_path: path = os.path.join(self.data_root, path) if create: gt.ensure_dir_exists(os.path.dirname(path)) return path
def write_vtk_data(_data, _path_to_file): fu.ensure_dir_exists(_path_to_file) writer = vtk.vtkXMLDataSetWriter() if vtk.VTK_MAJOR_VERSION <= 5: writer.SetInput(_data) else: writer.SetInputData(_data) writer.SetFileName(_path_to_file) writer.Update() writer.Write()
def copy_files(self, new_base_dir='.', overwrite=False, file_type='any', mode='copy', **kwargs ): """ copies file in new directory tree Try to replace by bids_layout's copy_files function in the future. """ #== retrieve query results file_list = self.bids_layout.get(**kwargs) #== select for reg/no-reg if file_type == 'reg': file_list = [file.filename for file in file_list if hasattr(file, 'registration')] elif file_type == 'noreg': file_list = [file.filename for file in file_list if (not hasattr(file, 'registration'))] else: file_list = [file.filename for file in file_list] if not file_type=='noreg': print("=== WARNING: directory structure for registered files is not handled correctly -- verify results!") #== generate new paths for old_path in file_list: new_path_rel = self.bids_layout.build_path(old_path, self.bids_layout.path_patterns) new_path_abs = os.path.join(new_base_dir, new_path_rel) gt.ensure_dir_exists(new_path_abs) if mode=='copy': print("Preparing to copy '%s' to '%s'"%(old_path, new_path_abs)) elif mode=='move': print("Preparing to move '%s' to '%s'" % (old_path, new_path_abs)) if os.path.exists(new_path_abs): if overwrite: os.remove(new_path_abs) shutil.copy(old_path, new_path_abs) else: print("File '%s' already exists ... skipping."%(new_path_abs)) else: shutil.copy(old_path, new_path_abs) if mode=='move': os.remove(old_path) try: os.rmdir(old_path) except: pass if not file_type=='noreg': print("=== WARNING: directory structure for registered files is not handled correctly -- verify results!")
def plot_segmentation_volumes(df, subject_ids=None, plot_selection=['Edema', 'EnhancingTumor'], out_dir=None, show=True): # Remove 'bratumia_' from labels df.columns = [col.split("_")[-1] for col in df.columns] # Convert units: mm^3 -> cm^3=ml df = df[plot_selection] / 1000. if subject_ids is None: subject_ids = df.reset_index().subject_id.unique() for subject_id in subject_ids: sel = df.loc[subject_id] # create plot fig, ax = plt.subplots(figsize=(6, 4)) sel.plot(kind='bar', ax=ax) #fig.subplots_adjust(right=0.7, bottom=0.2) # ax.legend(bbox_to_anchor=(1.01, 1), loc='upper', # ncol=1, borderaxespad=0.) leg = ax.legend(loc='upper center', frameon=True, ncol=2, borderaxespad=0.5) leg.get_frame().set_linewidth(0.0) ax.set_ylabel("Volume [cm$^3$]") ax.set_xlabel("") plt.xticks(rotation=45) ax.set_title("Patient UPN %s" % subject_id) max_value = sel.max().max() ax.set_ylim(0, max_value + max_value * 0.2) if out_dir is not None: gt.ensure_dir_exists(out_dir) save_name = "UPN-%s_segmentation_volumes.png" % (subject_id) fig.savefig(os.path.join(out_dir, save_name), bbox_inches='tight') if show: plt.show() else: plt.clf()
def convert_dcm_folder(path_to_dcm_folder, path_to_output_folder, file_name, export_dcm=False, anonymise=False, overwrite=False, t2pd=False): gt.ensure_dir_exists(path_to_output_folder) path_to_out_file = os.path.join(path_to_output_folder, file_name) print("== Converting '%s'" % path_to_dcm_folder) # Get & Extract Metadata dcm_df = pd.DataFrame() if export_dcm: try: #print(path_to_out_file+'.csv') if not os.path.exists(path_to_out_file + '.csv') or overwrite: print(" -- Extracting dicom metadata") dcm_file = os.listdir(path_to_dcm_folder)[0] ds = pydicom.dcmread(os.path.join(path_to_dcm_folder, dcm_file)) dcm_df = extract_dcm_header(ds, path_to_file=path_to_out_file, anonymise=anonymise) except: print("== Error extracting metadata for dicom folder '%s'" % (path_to_dcm_folder)) # Convert dcm to nifty if not t2pd: print(" -- Converting dicom file to NIFTI") try: path_to_out_file = path_to_out_file + '.nii' if not os.path.exists(path_to_out_file) or overwrite: gt.ensure_dir_exists(os.path.dirname(path_to_out_file)) #img_nii = dicom2nifti.dicom_series_to_nifti(path_to_dcm_folder, path_to_out_file) convert_dicom_to_nii(path_to_dcm_folder, path_to_output_folder, file_name) except: print("== Error converting dicom folder '%s'" % (path_to_dcm_folder)) else: print(" -- Converting dicom file to NIFTI (T2wPD)") try: path_to_out_file = path_to_out_file + '.nii' if not os.path.exists(path_to_out_file) or overwrite: gt.ensure_dir_exists(os.path.dirname(path_to_out_file)) target_path_t2 = os.path.join(path_to_output_folder, file_name + '.nii') pd_filename = "_".join( file_name.split("_")[:-1]) + "_PDfromT2wPD.nii" target_path_pd = os.path.join(path_to_output_folder, pd_filename) path_to_dicom_folder_escaped = path_to_dcm_folder.replace( ' ', '\\ ') convert_t2pd_image(dicom_dir=path_to_dicom_folder_escaped, tmp_output_dir=path_to_output_folder, tmp_filename=file_name, target_path_t2=target_path_t2, target_path_pd=target_path_pd) except: print("== Error converting dicom folder '%s'" % (path_to_dcm_folder)) return dcm_df
def create_pd_from_dcm_dir(dcm_dir, out_dir=None): files_to_exclude = [ '.raw', '.png', '.txt', '.jpg', '.jpeg', 'xls', '.nii', 'bat', 'cbf', 'cbv', 'kep', 'ktr', 'leakage', 'r2_', 've', 'volume', 'vp', 'con_', 'mtt', 'tif' ] df = pd.DataFrame() lstFilesDCM = [] # create an empty list for dirName, subdirList, fileList in os.walk(dcm_dir): print("Scanning directory '%s'." % dirName) cnt_file = 0 for filename in fileList: print(" - processing file '%s'." % filename) if cnt_file < 1: if not any(ext in filename.lower() for ext in files_to_exclude): # since each folder represents 1 acquisition, # we read only the first dicom file in the folder to get the folder metadata path_to_file = os.path.join(dirName, filename) try: ds = pydicom.dcmread(path_to_file, force=True) # extract info from dicom header attributes = [ 'study_description', 'study_date', 'series_description', 'patient_id', 'patient_name', 'series_instance_uid', 'study_instance_uid', 'patient_sex', 'patient_age', 'slice_thickness', 'spacing_between_slices', 'repetition_time', 'echo_time', 'inversion_time', 'mr_acquisition_type', 'sequence_variant', 'contrast_bolus_agent', 'protocol_name' ] meta_data = {} for attr in attributes: dcm_header_name = ''.join([ comp.capitalize() for comp in attr.split('_') ]) if dcm_header_name.lower().endswith( 'id' ): # correct issue with capitalisation of ID dcm_header_name = dcm_header_name[:-2] + 'ID' if hasattr(ds, dcm_header_name): meta_data[attr] = getattr(ds, dcm_header_name) lstFilesDCM.append(os.path.join(dirName, filename)) meta_data['path_to_dir'] = dirName series = pd.Series(meta_data) except: print("Could not process file '%s'" % path_to_file) cnt_file = cnt_file + 1 if hasattr(series, 'patient_id'): if series.patient_id is not None: df = df.append(series, ignore_index=True) cnt_file = cnt_file + 1 else: print("Did not find patient id, try again") # save if out_dir: gt.ensure_dir_exists(out_dir) df.to_excel(os.path.join(out_dir, 'data.xls')) df.to_pickle(os.path.join(out_dir, 'data.pkl')) return df
def organize_files(df_seqs=None, export_dcm=True, overwrite=False, anonymise=True): data_io = dio.DataIO(config.coh_dir_bids, config.path_to_coh_bids_config) if df_seqs is None: df_seqs = pd.read_pickle( config.coh_path_to_metadata_sequences_pkl).reset_index() df_seqs.orientation = df_seqs.orientation.astype(str) #df_seqs['subject_id'] = df_seqs.patient_id sel = df_seqs for index, row in sel.iterrows(): # path_to_dicom_dir # source_dir_subject= str(row['patient_name']) # source_dir_study = row['study_instance_uid'] # source_dir_series = row['series_instance_uid'] # path_to_dicom_dir = os.path.join(config.coh_dir_raw_data, source_dir_subject, source_dir_study, source_dir_series) path_to_dicom_dir = row.path_to_dir if row.study_date.to_datetime64() in df_seqs[ df_seqs.upn == row.upn].study_date.sort_values().unique(): subject_id = str(row.upn) session = dio.create_session_name(row.study_date) modality = str(row.sequence_name) orientation = row.orientation print("Processing subject '%s', %s, %s" % (subject_id, session, modality)) path_to_file = data_io.create_image_path(subject=subject_id, session=session, modality=modality, create=False) path_to_output_folder = os.path.dirname(path_to_file) file_name = os.path.basename(path_to_file) if modality in ['T1w', 'T1wPost', 'T2w', 'T2wFLAIR' ] and 'AX' in orientation: if row.dimension == '3D': modality = modality + '-3D' path_to_file = data_io.create_image_path( subject=subject_id, session=session, modality=modality) path_to_output_folder = os.path.dirname(path_to_file) file_name = os.path.basename(path_to_file) print("# %s, %s, %s " % (path_to_dicom_dir, path_to_file, path_to_output_folder)) gt.ensure_dir_exists(path_to_output_folder) dct.convert_dcm_folder(path_to_dicom_dir, path_to_output_folder, file_name, anonymise=anonymise, export_dcm=export_dcm, overwrite=overwrite) elif modality in ['T2wPD']: print("# %s, %s, %s " % (path_to_dicom_dir, path_to_file, path_to_output_folder)) gt.ensure_dir_exists(path_to_output_folder) dct.convert_dcm_folder(path_to_dicom_dir, path_to_output_folder, file_name, anonymise=anonymise, export_dcm=export_dcm, overwrite=overwrite, t2pd=True) # elif modality in ['DWI', 'ADC', 'DSC', 'DCE']: elif modality in ['DCE']: print("# %s " % (file_name)) check_for_output_folder(path_to_output_folder) dct.convert_dcm_folder(path_to_dicom_dir, path_to_output_folder, file_name, anonymise=anonymise, export_dcm=export_dcm, overwrite=overwrite) # elif modality in ['T1wMapFA']: # file_name = file_name + "_" + row.FA # print("# %s " % (file_name)) # gt.ensure_dir_exists(path_to_output_folder) # dcm_tools.convert_dcm_folder(path_to_dicom_dir, path_to_output_folder, file_name, # anonymise=False, export_dcm=EXPORT_DCM, overwrite=OVERWRITE) sel.loc[index, 'path_to_nii'] = os.path.join(path_to_output_folder, file_name)
def generate_bratumia_input(n_per_file=10, selection='all'): data_io = dio.DataIO(config.coh_dir_bids, config.path_to_coh_bids_config) gt.ensure_dir_exists(config.coh_dir_bratumia) df = pd.DataFrame() count = 0 for subject_id in data_io.bids_layout.unique('subject'): # for subject_id in bids_rest: for session in data_io.bids_layout.get(target='session', subject=subject_id, processing='original', return_type='id'): modalities = data_io.bids_layout.get(target='modality', subject=subject_id, session=session, processing='original', return_type='id') print(subject_id, session, modalities) # create table df.loc[count, 'subject_id'] = subject_id df.loc[count, 'session'] = session df.loc[count, 'modalities'] = ', '.join(modalities) if ((('T2w' in modalities) or ('T2wPD' in modalities)) and ('T2wFLAIR' in modalities) and (('T1w' in modalities) or ('T1w-3D' in modalities)) and (('T1wPost' in modalities) or ('T1wPost-3D' in modalities))): try: # -- T1 if 'T1w-3D' in modalities: T1_mod = 'T1w-3D' elif 'T1w' in modalities: T1_mod = 'T1w' path_to_T1 = data_io.bids_layout.get( subject=subject_id, session=session, modality=T1_mod, processing='original', extensions='nii')[0].filename # -- T1c if 'T1wPost-3D' in modalities: T1c_mod = 'T1wPost-3D' elif 'T1wPost' in modalities: T1c_mod = 'T1wPost' path_to_T1c = data_io.bids_layout.get( subject=subject_id, session=session, modality=T1c_mod, processing='original', extensions='nii')[0].filename # -- T2 if 'T2w' in modalities: T2w_mod = 'T2w' elif 'T2wPD' in modalities: T2w_mod = 'T2wPD' path_to_T2 = data_io.bids_layout.get( subject=subject_id, session=session, modality=T2w_mod, processing='original', extensions='nii')[0].filename # -- FLAIR path_to_FLAIR = data_io.bids_layout.get( subject=subject_id, session=session, modality='T2wFLAIR', processing='original', extensions='nii')[0].filename # -- OUTFILE out_path = subject_id + '_' + session # -- write to file df.loc[count, 'status'] = 'ready for segmentation' df.loc[count, 'path_to_T1'] = path_to_T1 df.loc[count, 'path_to_T1c'] = path_to_T1c df.loc[count, 'path_to_T2'] = path_to_T2 df.loc[count, 'path_to_FLAIR'] = path_to_FLAIR df.loc[count, 'out_path'] = out_path except: print("Problem identifying files") df.loc[count, 'status'] = 'nii missing' else: print( "Not all modalities available for subject '%s', session '%s'" % (subject_id, session)) df.loc[count, 'status'] = 'modality missing' # check if already segmented path_to_tumor_seg = data_io.create_registered_image_path( subject=subject_id, session=session, modality='tumorseg', segmentation='tumor', other='bratumia', processing='bratumia', extension='mha', create=False) if os.path.exists(path_to_tumor_seg): segmented = True else: segmented = False df.loc[count, 'segmented'] = segmented count = count + 1 df.to_excel(config.coh_dir_scanned_for_seg_xls) df.to_pickle(config.coh_dir_scanned_for_seg_pkl) df_for_bratumia = df[~df.path_to_T1.isna()].reset_index() df_tb_reviewed = df[df.path_to_T1.isna()].reset_index() df_for_bratumia.set_index(['subject_id', 'session']).sort_index().to_excel( os.path.join(config.coh_dir_bratumia, 'files_ready_for_segmentation.xls')) df_tb_reviewed.set_index(['subject_id', 'session']).sort_index().to_excel( os.path.join(config.coh_dir_bratumia, 'files_to_be_reviewed_for_segmentation.xls')) bratumia_columns = [ 'path_to_T1', 'path_to_T1c', 'path_to_T2', 'path_to_FLAIR', 'out_path' ] if selection == 'all': df_sel = df_for_bratumia else: df_sel = df_for_bratumia[df_for_bratumia.segmented == False] df_sel[bratumia_columns].to_csv(os.path.join(config.coh_dir_bratumia, 'to_segment.csv'), index=False, header=False) for sublist in chunks(df_sel.subject_id.unique(), n_per_file): file_name = "batch_ids_" + "-".join(sublist) + ".csv" selection_pd = df_sel[df_sel.subject_id.isin(sublist)] selection_pd[bratumia_columns].to_csv(os.path.join( config.coh_dir_bratumia, file_name), index=False, header=False) print("Saved files to %s" % config.coh_dir_bratumia) return df_for_bratumia
def check_for_output_folder(path_to_folder): if os.path.exists(path_to_folder): path_to_folder = path_to_folder + "_2" else: gt.ensure_dir_exists(path_to_folder)
'keyword': { 'DSC': ['perfusion', 'perf'], 'DCE': ['dce', 'dynamic'], 'ADC': ['adc', 'apparent'], 'DWI': ['dw'], 'DTI': ['dti'] }, 'FA': { 'num': 'FA[0-9]{1,2}' } } base_dir = "/Volumes/WD-EXT_1TB_MacOS_ENC/COH_CART" path_to_dicom = os.path.join(base_dir, "DOI") output_dir = os.path.join(path_to_dicom, 'dataset_info') gt.ensure_dir_exists(output_dir) df = dcm_tools.create_pd_from_dcm_dir(dcm_dir=path_to_dicom, out_dir=output_dir) # convert study date to date-time format df.study_date = pd.to_datetime(df.study_date) df = df.set_index(['patient_id', 'study_instance_uid', 'series_instance_uid']) # identify sequence information #df = pd.read_pickle(os.path.join(output_dir, 'data_with_sequences.pkl')) df_seqs = dcm_tools.identify_sequences(df, series_descr_map, output_dir) # Create Summary df
import analysis.irb_13384.coh_config as config from tools import data_io as dio import analysis.irb_13384.coh_helpers as ch import tools.general_tools as gt import os gt.ensure_dir_exists(config.coh_dir_analysis_segmentation) data_io = dio.DataIO(config.coh_dir_bids, config.path_to_coh_bids_config) # This function looks for existing segmentation files and analyzes them # It gives preferences to files ending in '_p.mha' df = ch.analyze_segmentations(data_io, subjects=None) #-- compute total volume all_segmentation_labels = [ col for col in df.columns if col.startswith('bratumia') ] df["bratumia_total_segmented_volume"] = df[all_segmentation_labels].sum(axis=1) all_tumor_labels = [ 'bratumia_EnhancingTumor', 'bratumia_Necrosis', 'bratumia_NonEnhancingTumor' ] df["bratumia_TotalTumor"] = df[all_tumor_labels].sum(axis=1) other_tumor_labels = ['bratumia_Necrosis', 'bratumia_NonEnhancingTumor'] df["bratumia_OtherTumor"] = df[other_tumor_labels].sum(axis=1) #-- save df.to_excel( os.path.join(config.coh_dir_analysis_segmentation, 'segmentation_stats_single_index.xls')) df = df.set_index(['subject_id', 'session']).sort_index() df.to_excel(config.coh_dir_output_labelstats_xls)
#coh_data_dir = os.path.join(coh_base_dir,'ORIG') coh_base_dir_in = "/Volumes/WD-EXT_1TB_MacOS_ENC/COH" coh_base_dir_in = "/Volumes/Macintosh HD-1/Users/mathoncuser/Desktop/DATA/CAR-T-CELL" coh_base_dir_in = "/Volumes/mathoncuser/Desktop/DATA/CAR-T-CELL" coh_dir_raw_data = coh_base_dir_in coh_analysis_dir = os.path.join(project_path, 'analysis', 'irb_13384') path_to_coh_bids_config = os.path.join(coh_analysis_dir, 'coh_bids_config.json') #path_to_id_map = os.path.join(project_path, 'do_not_include_in_git', 'car-t-cell_patient-list_plain.xlsx') #== output to BRAIN folder coh_dir_output_repo = os.path.join(coh_base_dir_out, 'output') gt.ensure_dir_exists(coh_dir_output_repo) coh_dir_output_datainfo = os.path.join(coh_dir_output_repo, 'datainfo') gt.ensure_dir_exists(coh_dir_output_datainfo) coh_path_to_metadata_raw_xls = os.path.join(coh_dir_output_datainfo, 'dcm_metadata.xls') coh_path_to_metadata_raw_pkl = os.path.join(coh_dir_output_datainfo, 'dcm_metadata.pkl') coh_path_to_metadata_sequences_xls = os.path.join(coh_dir_output_datainfo, 'dcm_metadata_with_sequences.xls') coh_path_to_metadata_sequences_pkl = os.path.join(coh_dir_output_datainfo, 'dcm_metadata_with_sequences.pkl') coh_path_to_metadata_sequences_selection_xls = os.path.join(coh_dir_output_datainfo, 'dcm_metadata_with_sequences_selection.xls') coh_path_to_metadata_sequences_timepoint_summary_xls = os.path.join(coh_dir_output_datainfo, 'dcm_metadata_with_sequences_timepoint_summary.xls') coh_dir_output_processed = os.path.join(coh_dir_output_repo, 'processed') coh_dir_output_for_nb = os.path.join(coh_dir_output_repo, 'for_notebook') coh_dir_output_labelstats_xls = os.path.join(coh_dir_output_repo, 'segmentation_label_stats.xls') coh_dir_output_labelstats_pkl = os.path.join(coh_dir_output_repo, 'segmentation_label_stats.pkl') coh_dir_scanned_for_seg_xls = os.path.join(coh_dir_bratumia, 'files_scanned_for_segmentation.xls')