def Paired_Visits_Worksheet(input_csv, output_csv, grab_column=2, r2_thresholds=[.9]): print(r2_thresholds) for r2 in r2_thresholds: input_data = np.genfromtxt(replace_suffix(input_csv, '', '_' + str(r2)), delimiter=',', dtype=object, skip_header=1) print(input_data) visit_1_list = [x for x in input_data[:, 0] if 'VISIT_01' in x] output_data = np.zeros((len(visit_1_list) + 1, 3), dtype=object) output_data[0, :] = ['method_code', 'visit_1', 'visit_2'] with open(replace_suffix(output_csv, '', '_' + str(r2)), 'wb') as writefile: csvfile = csv.writer(writefile, delimiter=',') csvfile.writerow(output_data[0, :]) for visit_idx, visit in enumerate(visit_1_list): if 'r2_r2' in visit: continue split_visit = str.split(visit, 'VISIT_01') new_visit = split_visit[0] + 'VISIT_02' + split_visit[1] if new_visit in input_data[:, 0]: print(np.where(input_data == visit)[0][0]) output_data[visit_idx + 1, 0] = visit output_data[visit_idx + 1, 1] = input_data[np.where( input_data == visit)[0][0], grab_column] output_data[visit_idx + 1, 2] = input_data[np.where( input_data == new_visit)[0][0], grab_column] if output_data[visit_idx + 1, 0] != 0 and output_data[ visit_idx + 1, 0] != '0' and input_data[ np.where(input_data == visit)[0][0], -1] != '0' and input_data[np.where( input_data == new_visit)[0][0], -1] != '0': csvfile.writerow(output_data[visit_idx + 1, :])
def Save_Directory_Statistics(input_directory, ROI_directory, output_csv, mask=False, mask_suffix='_mask', r2_thresholds=[.9]): """ Save ROI statistics into a giant csv file. """ # exclude_patients = ['CED_19', ] file_database = glob.glob(os.path.join(input_directory, '*blur*r2_' + str(r2_thresholds[0]) + '.nii*')) output_headers = ['filename','mean','median','min','max','std', 'total_voxels','removed_values', 'removed_percent', 'low_values', 'low_percent'] ROI_dict = {} for ROI in glob.glob(os.path.join(ROI_directory, '*.nii*')): ROI_dict[os.path.basename(os.path.normpath(ROI))[0:15]] = convert_input_2_numpy(ROI) for r2 in r2_thresholds: output_data = np.zeros((1+len(file_database), len(output_headers)),dtype=object) output_data[0,:] = output_headers with open(replace_suffix(output_csv, '', '_' + str(r2)), 'wb') as writefile: csvfile = csv.writer(writefile, delimiter=',') csvfile.writerow(output_data[0,:]) for row_idx, filename in enumerate(file_database): data_array = convert_input_2_numpy(filename) patient_visit_code = os.path.basename(os.path.normpath(filename))[0:15] roi_array = ROI_dict[patient_visit_code] r2_filename = str.split(filename, '_') r2_filename[-3] = 'r2' r2_filename = '_'.join(r2_filename) r2_array = convert_input_2_numpy(r2_filename) data_array[data_array<0] = -.01 data_array[r2_array<=r2] = -.01 data_array[roi_array<=0] = -.01 masked_data_array_ROI = np.ma.masked_where(data_array < 0, data_array) ROI_values = [np.ma.mean(masked_data_array_ROI), np.ma.median(masked_data_array_ROI), np.ma.min(masked_data_array_ROI), np.ma.max(masked_data_array_ROI), np.ma.std(masked_data_array_ROI), (roi_array > 0).sum(), ((data_array <= 0) & (roi_array > 0)).sum(), float(((data_array <= 0) & (roi_array > 0)).sum()) / float((roi_array > 0).sum()), ((r2_array >= r2) & (roi_array > 0)).sum(), float(((r2_array >= r2) & (roi_array > 0)).sum()) / float((roi_array > 0).sum())] print ROI_values output_data[row_idx+1] = [filename] + ROI_values csvfile.writerow(output_data[row_idx+1]) return
def Determine_R2_Cutoff_Point(input_directory, ROI_directory): """ Save ROI statistics into a giant csv file. """ file_database = glob.glob(os.path.join(input_directory, '*.nii*')) output_headers = [ 'filename', 'mean', 'median', 'std', 'min', 'max', 'total_voxels', 'removed_values', 'removed_percent', 'low_values', 'low_percent' ] output_data = np.zeros((1 + len(file_database), len(output_headers)), dtype=object) output_data[0, :] = output_headers ROI_dict = {} for ROI in glob.glob(os.path.join(ROI_directory, '*.nii*')): ROI_dict[os.path.basename( os.path.normpath(ROI))[0:15]] = convert_input_2_numpy(ROI) r2_masked_num, r2_total_num = [0] * 100, [0] * 100 np.set_printoptions(precision=2) np.set_printoptions(suppress=True) for row_idx, filename in enumerate(file_database): if 'ktrans' not in filename or '0.2' in filename: continue data_array = convert_input_2_numpy(filename) r2_array = convert_input_2_numpy( replace_suffix(filename, input_suffix=None, output_suffix='r2', suffix_delimiter='_')) # print replace_suffix(filename, input_suffix=None, output_suffix='r2', suffix_delimiter='_') patient_visit_code = os.path.basename(os.path.normpath(filename))[0:15] roi_array = ROI_dict[patient_visit_code] for r2_idx, r2_threshold in enumerate(np.arange(0, 1, .01)): r2_masked_num[r2_idx] += ((r2_array <= r2_threshold) & (roi_array > 0)).sum() r2_total_num[r2_idx] += (roi_array > 0).sum() print( np.array(r2_masked_num, dtype=float) / np.array(r2_total_num, dtype=float)) r2_percent_num = np.array(r2_masked_num, dtype=float) / np.array( r2_total_num, dtype=float) for r2_idx, r2_threshold in enumerate(range(0, 1, .01)): print(r2_threshold) print(r2_percent_num[r2_idx]) return
def Convert_NordicIce_AIF(AIF_directory, output_suffix='_AIF'): AIF_list = glob.glob(os.path.join(AIF_directory, '*VISIT*.txt')) AIF_numpy_list = [[np.loadtxt(AIF, dtype=float), AIF] for AIF in AIF_list] for AIF in AIF_numpy_list: print AIF[1] print AIF[0].shape np.savetxt(replace_suffix(AIF[1], '', output_suffix), AIF[0][None], fmt='%2.5f', delimiter=';')
def Preprocess_Volumes(input_directory, output_directory, r2_threshold=.9): if not os.path.exists(output_directory): os.mkdir(output_directory) file_database = glob.glob(os.path.join(input_directory, '*r2*.nii*')) print(os.path.join(input_directory, '*r2*.nii*')) for file in file_database: print(file) input_ktrans = replace_suffix(file, 'r2', 'ktrans') input_ve = replace_suffix(file, 'r2', 've') output_ktrans = os.path.join( output_directory, replace_suffix(os.path.basename(file), 'r2', 'ktrans_r2_' + str(r2_threshold))) output_ve = os.path.join( output_directory, replace_suffix(os.path.basename(file), 'r2', 've_r2_' + str(r2_threshold))) output_kep = os.path.join( output_directory, replace_suffix(os.path.basename(file), 'r2', 'kep_r2_' + str(r2_threshold))) output_r2 = os.path.join( output_directory, replace_suffix(os.path.basename(file), 'r2', 'r2_r2_' + str(r2_threshold))) print(input_ktrans) r2_map = np.nan_to_num(convert_input_2_numpy(file)) ktrans_map = convert_input_2_numpy(input_ktrans) ve_map = convert_input_2_numpy(input_ve) print((r2_map < r2_threshold).sum()) ve_map[ktrans_map > 10] = 0 ktrans_map[ktrans_map > 10] = 0 ktrans_map[ve_map > 1] = 0 ve_map[ve_map > 1] = 0 ktrans_map[r2_map < r2_threshold] = -.01 ve_map[r2_map < r2_threshold] = -.01 kep_map = np.nan_to_num(ktrans_map / ve_map) kep_map[r2_map < r2_threshold] = -.01 save_numpy_2_nifti(ktrans_map, input_ktrans, output_ktrans) save_numpy_2_nifti(ve_map, input_ktrans, output_ve) save_numpy_2_nifti(kep_map, input_ktrans, output_kep) save_numpy_2_nifti(r2_map, input_ktrans, output_r2)
def Coeffecient_of_Variation_Worksheet(input_csv, output_csv, r2_thresholds=[.9]): for r2 in r2_thresholds: input_data = np.genfromtxt(replace_suffix(input_csv, '', '_' + str(r2)), delimiter=',', dtype=object, skip_header=1) headers = ['method', 'RMS_COV', 'LOG_COV', 'SD_COV', 'CCC', 'R2', 'LOA_pos', 'LOS_neg', 'RC', 'mean_all_vals', 'n_measurements'] output_data = np.zeros((3000, len(headers)), dtype=object) output_data[0,:] = headers methods, finished_methods = [], [] # Get all methods for row in input_data: if row[0] == '0' or '--' in row: continue methods += [str.split(row[0], '/')[-1][15:]] method_dict = defaultdict(set) for method in methods: patient_list = [method == str.split(x, '/')[-1][15:] for x in input_data[:,0]] patient_list = input_data[patient_list, :] not_masked = [(x[1] != '--' and x[2] != '--') for x in patient_list] not_masked_patient_list = patient_list[not_masked, :] for row in not_masked_patient_list: method_dict[method].add(str.split(row[0], '/')[-1][0:15]) available_patients = [] for key, value in method_dict.iteritems(): print key if len(value) < 5: continue if available_patients == []: available_patients = value if len(value) < len(available_patients): available_patients = value print available_patients print len(available_patients) new_input_data = np.zeros((1,3), dtype=object) for row_idx, row in enumerate(input_data): patient = str.split(row[0], '/')[-1][0:15] if patient in available_patients: new_input_data = np.vstack((new_input_data, row)) input_data = new_input_data[1:,:] with open(replace_suffix(output_csv, '', '_' + str(r2)), 'wb') as writefile: csvfile = csv.writer(writefile, delimiter=',') csvfile.writerow(output_data[0,:]) row_idx = 0 for row in input_data: if row[0] == '0' or '--' in row or row[0] == 0: continue patient = str.split(row[0], '/')[-1][0:15] if patient not in available_patients: continue method = str.split(row[0], '/')[-1][15:] if 't1map' in method: continue aif_method = str.split(method, '_') aif_method[1] = 'sameAIF21' aif_method = '_'.join(aif_method) for row2 in input_data: if aif_method in row2: continue if method not in finished_methods: # patient_list = np.where(method in input_data) patient_list = [method == str.split(x, '/')[-1][15:] for x in input_data[:,0]] patient_list = input_data[patient_list, :] # print 'METHOD', method # Non-Iterative Equations not_masked = [(x[1] != 'nan' and x[2] != 'nan') for x in patient_list] # print not_masked not_masked_patient_list = patient_list[not_masked, :] # print not_masked_patient_list x, y = not_masked_patient_list[:,1].astype(float), not_masked_patient_list[:,2].astype(float) if not_masked_patient_list.shape[0] < 10: continue # CCC mean_x = np.mean(x) mean_y = np.mean(y) std_x = np.std(x) std_y = np.std(y) correl = np.ma.corrcoef(x,y)[0,1] CCC = (2 * correl * std_x * std_y) / (np.ma.var(x) + np.ma.var(y) + np.square(mean_x - mean_y)) # Mean all values mean_all_vals = np.mean(not_masked_patient_list[:,1:].astype(float)) # R2 R2_score = r2_score(y, x) # Limits of Agreement (LOA) differences = x - y mean_diff = np.mean(differences) std_diff = np.std(differences) LOA_neg, LOA_pos = mean_diff - 2*std_diff, mean_diff + 2*std_diff # Covariance and Repeatability Coeffecient RMS_sum = 0 LOG_sum = 0 SD_sum_1 = 0 SD_sum_2 = 0 RC_sum = 0 n = 0 for patient in not_masked_patient_list: data_points = [float(d) for d in patient[1:]] skip=False for d in data_points: if d == 0: skip = True if skip: continue print data_points RMS_sum += np.power(abs(data_points[0] - data_points[1]) / np.mean(data_points), 2) LOG_sum += np.power(np.log(data_points[0]) - np.log(data_points[1]), 2) SD_sum_1 += np.power(data_points[0] - data_points[1], 2) SD_sum_2 += np.sum(data_points) n += 1 RMS_COV = 100 * np.sqrt(RMS_sum / (2*n)) LOG_COV = 100 * np.exp(np.sqrt(LOG_sum / (2*n)) - 1) SD_COV = 100 * np.sqrt(SD_sum_1 / (2*n)) / (SD_sum_2 / (2*n)) RC = (SD_sum_1 / n) * 1.96 output_data[row_idx+1, :] = [method, RMS_COV, LOG_COV, SD_COV, CCC, R2_score, LOA_pos, LOA_neg, RC, mean_all_vals, n] # print output_data[row_idx+1, :] finished_methods += [method] # print methods if output_data[row_idx+1, 0] != 0 and output_data[row_idx+1, 0] != '0': print 'nice' csvfile.writerow(output_data[row_idx+1,:]) row_idx += 1 else: # print 'SKIPPED!!!!' continue return
def dcm_2_numpy(input_folder, verbose=False): """ Uses pydicom to stack an alphabetical list of DICOM files. TODO: Make it take slice_order into account. """ if verbose: print('Searching for dicom files...') found_files = grab_files_recursive(input_folder) if verbose: print('Found', len(found_files), 'in directory. \n') print('Checking DICOM compatability...') dicom_files = [] for file in found_files: try: temp_dicom = pydicom.read_file(file) dicom_files += [[ file, temp_dicom.data_element('SeriesInstanceUID').value ]] except: continue if verbose: print('Found', len(dicom_files), 'DICOM files in directory. \n') print('Counting volumes..') unique_dicoms = defaultdict(list) for dicom_file in dicom_files: UID = dicom_file[1] unique_dicoms[UID] += [dicom_file[0]] if verbose: print('Found', len(list(unique_dicoms.keys())), 'unique volumes \n') print('Saving out files from these volumes.') output_dict = {} output_filenames = [] for UID in list(unique_dicoms.keys()): try: # Grab DICOMs for a certain Instance current_files = unique_dicoms[UID] current_dicoms = [ get_uncompressed_dicom(dcm) for dcm in unique_dicoms[UID] ] # print current_files # Sort DICOMs by Instance. dicom_instances = [ x.data_element('InstanceNumber').value for x in current_dicoms ] current_dicoms = [ x for _, x in sorted(zip(dicom_instances, current_dicoms)) ] current_files = [ x for _, x in sorted(zip(dicom_instances, current_files)) ] first_dicom, last_dicom = current_dicoms[0], current_dicoms[-1] print(first_dicom.file_meta) print(first_dicom.file_meta.TransferSyntaxUID) # Create a filename for the DICOM volume_label = '_'.join([ first_dicom.data_element(tag).value for tag in naming_tags ]).replace(" ", "") volume_label = prefix + sanitize_filename( volume_label) + suffix + '.nii.gz' if verbose: print('Saving...', volume_label) except: print( 'Could not read DICOM volume SeriesDescription. Skipping UID...', str(UID)) continue try: # Extract patient position information for affine creation. output_affine = np.eye(4) image_position_patient = np.array( first_dicom.data_element('ImagePositionPatient').value).astype( float) image_orientation_patient = np.array( first_dicom.data_element( 'ImageOrientationPatient').value).astype(float) last_image_position_patient = np.array( last_dicom.data_element('ImagePositionPatient').value).astype( float) pixel_spacing_patient = np.array( first_dicom.data_element('PixelSpacing').value).astype(float) # Create DICOM Space affine (don't fully understand, TODO) output_affine[ 0:3, 0] = pixel_spacing_patient[0] * image_orientation_patient[0:3] output_affine[ 0:3, 1] = pixel_spacing_patient[1] * image_orientation_patient[3:6] output_affine[ 0:3, 2] = (image_position_patient - last_image_position_patient) / (1 - len(current_dicoms)) output_affine[0:3, 3] = image_position_patient # Transformations from DICOM to Nifti Space (don't fully understand, TOO) cr_flip = np.eye(4) cr_flip[0:2, 0:2] = [[0, 1], [1, 0]] neg_flip = np.eye(4) neg_flip[0:2, 0:2] = [[-1, 0], [0, -1]] output_affine = np.matmul(neg_flip, np.matmul(output_affine, cr_flip)) # Create numpy array data... output_shape = get_dicom_pixel_array(current_dicoms[0], current_files[0]).shape output_numpy = [] for i in range(len(current_dicoms)): try: output_numpy += [ get_dicom_pixel_array(current_dicoms[i], current_files[i]) ] except: print('Warning, error at slice', i) output_numpy = np.stack(output_numpy, -1) # If preferred, harden to identity matrix space (LPS, maybe?) # Also unsure of the dynamic here, but they work. if harden_orientation is not None: cx, cy, cz = np.argmax(np.abs(output_affine[0:3, 0:3]), axis=0) output_numpy = np.transpose(output_numpy, (cx, cy, cz)) harden_matrix = np.eye(4) for dim, i in enumerate([cx, cy, cz]): harden_matrix[i, i] = 0 harden_matrix[dim, i] = 1 output_affine = np.matmul(output_affine, harden_matrix) flip_matrix = np.eye(4) for i in range(3): if output_affine[i, i] < 0: flip_matrix[i, i] = -1 output_numpy = np.flip(output_numpy, i) output_affine = np.matmul(output_affine, flip_matrix) # Create output folder according to tags. specific_folder = output_folder for tag in folder_tags: if specific_folder == output_folder or folder_mode == 'recursive': specific_folder = os.path.join( specific_folder, sanitize_filename(first_dicom.data_element(tag).value)) elif folder_mode == 'combine': specific_folder = specific_folder + '_' + sanitize_filename( first_dicom.data_element(tag).value) if not os.path.exists(specific_folder): os.makedirs(specific_folder) # Save out file. output_filename = os.path.join(specific_folder, volume_label) if os.path.exists( output_filename) and output_filename in output_filenames: output_filename = replace_suffix(output_filename, '', '_copy') save_numpy_2_nifti(output_numpy, output_affine, output_filename) output_filenames += [output_filename] except: print('Could not read DICOM at SeriesDescription...', volume_label) return output_filenames return output_dict