def compare_readings(reading_gt_file, reading_pred_file): map_dict = load_map_dict( '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv', 3, 29) reading_gt = parse.parse(reading_gt_file, map_dict) reading_pred = parse.parse(reading_pred_file, map_dict) count = 0 correct = 0 print('Comparing {0} and {1}'.format(os.path.basename(reading_gt_file), os.path.basename(reading_pred_file))) reading_map_gt = {} for read in reading_gt: reading_map_gt[read['patient_id'].strip() + read['str'].strip()] = read reading_map_pred = {} for read in reading_pred: reading_map_pred[read['patient_id'].strip() + read['str'].strip()] = read for key in reading_map_gt: pred_label = reading_map_pred[key]['series_label'] gt_label = reading_map_gt[key]['series_label'] if gt_label == pred_label: correct += 1 count += 1 print('Total:{0} Correct:{1} Acc:{2}'.format(count, correct, float(correct) / float(count)))
def load_reading(reading_file): '''Load the reader file into dict format''' map_txt = '/home/zzhu/Data/data/ai_vs_radiologist/map_fatonly.txt' dt = {'names': ('series_name', 'label'), 'formats': ('S20', 'i2')} name_label_list = np.loadtxt(map_txt, dtype=dt) name_label_dict = {} for i in range(len(name_label_list)): name_label_dict[name_label_list[i][0].decode( "utf-8")] = name_label_list[i][1] name_label_dict['dwi_t2'] = name_label_dict['dwi_and_t2'] name_label_dict['t2'] = name_label_dict['dwi_and_t2'] name_label_dict['hepatocyte'] = name_label_dict['hepa_trans'] name_label_dict['transitional'] = name_label_dict['hepa_trans'] info_dict_file = reading_file col_begin = 5 col_end = 43 # deal with anything else classes with open(info_dict_file, 'r') as csv_file: rows = csv.reader(csv_file, delimiter=',') header = next(rows, None) for i in range(col_begin, col_end): series_name = header[i] if series_name not in name_label_dict: name_label_dict[series_name] = name_label_dict['anythingelse'] # load info_dict series_info = parse_reader_csv.parse(info_dict_file, name_label_dict) reading_dict = {} for info in series_info: series_id = info['series_id'] series_label = info['series_label'] reading_dict[series_id] = series_label return reading_dict
def compare_two_readings(reading1_file, reading2_file, map_dict): print('Comparing {0} and {1}'.format(reading1_file, reading2_file)) reading1 = parse.parse(reading1_file, map_dict) reading2 = parse.parse(reading2_file, map_dict) rs1 = set() for r in reading1: if r['series_id'] in rs1: print('Repeated item:{0}'.format(r)) rs1.add(r['series_id']) rs2 = set() for r in reading2: if r['series_id'] in rs2: print('Repeated item:{0}'.format(r)) rs2.add(r['series_id']) # Check repeated items first if len(reading1) != len(rs1): print('reading 1 may contain repeated items') print('from list:{0}'.format(len(reading1))) if len(reading2) != len(rs2): print('reading 2 may contain repeated items') print('from list:{0}'.format(len(reading2))) print('{0} reading contains {1} items'.format( os.path.basename(reading1_file), len(rs1))) print('{0} reading contains {1} items'.format( os.path.basename(reading2_file), len(rs2))) diff1 = rs1 - rs2 if diff1: print('In {0} but not in {1}:'.format(os.path.basename(reading1_file), os.path.basename(reading2_file))) for item in diff1: print(item) diff2 = rs2 - rs1 if diff2: print('In {0} but not in {1}:'.format(os.path.basename(reading2_file), os.path.basename(reading1_file))) for item in diff2: print(item)
def test_6(): # construct dataset. Each instance is a 3D volume. reference_file = '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv' patient_folder = '/media/zzhu/Seagate Backup Plus Drive/data/Amber/png_series' tfrecord_file = '/media/zzhu/Seagate Backup Plus Drive/data/Amber/tfrecord/external_validation.tfrecord' map_dict = load_map_dict( '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv', 3, 29) reading = parse.parse( '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv', map_dict) reading_map = {} for read in reading: reading_map[read['patient_id'] + read['str'].strip()] = read patient_folder_list = glob.glob(patient_folder + '/*') series_num = 0 with tf.python_io.TFRecordWriter(tfrecord_file) as writer: for patient_folder in patient_folder_list: patient_id = os.path.basename(patient_folder)[-3:] series_folder_list = glob.glob(patient_folder + '/*') for series_folder in series_folder_list: folder_name = os.path.basename(series_folder).strip() read = reading_map[patient_id + folder_name] series_str = read['str'] patient_id = read['patient_id'] series_label = read['series_label'] slice_file_list = glob.glob(series_folder + '/*') #print(slice_file_list[0]) if len(slice_file_list) == 0: print(series_folder) series_num += 1 if series_num % 50 == 0: print(series_num) anchor_slice_file = slice_file_list[0] anchor_slice = cv2.imread(anchor_slice_file, 0) slice_height = anchor_slice.shape[0] slice_width = anchor_slice.shape[1] slice_depth = len(slice_file_list) volume = np.zeros((slice_height, slice_width, slice_depth), dtype=np.uint8) slice_idx = 0 for slice_file in slice_file_list: slice = cv2.imread(slice_file, 0) if slice.shape[0] != volume.shape[0]: print(slice_file_list[0]) slice = cv2.resize(slice, (volume.shape[1], volume.shape[0])) volume[:, :, slice_idx] = slice slice_idx += 1 volume_raw = volume.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'height': _int64_feature(slice_height), 'width': _int64_feature(slice_width), 'slice': _int64_feature(slice_depth), 'label': _int64_feature(series_label), 'series_str': _bytes_feature(series_str), 'patient_id': _bytes_feature(patient_id), 'volume_raw': _bytes_feature(volume_raw) })) writer.write(example.SerializeToString()) print('In total {0} series'.format(series_num))
def separate_series(input_folder, output_folder): # use mustafa's file as reference map_dict = load_map_dict( '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv', 3, 29) reading = parse.parse( '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv', map_dict) count = 0 for info in reading: count += 1 if count % 25 == 0: print(count) patient_id = info['patient_id'] patient_folder_source = os.path.join( input_folder, 'LRML_{:04d}'.format(int(patient_id))) patient_folder_target = os.path.join( output_folder, 'LRML_{:04d}'.format(int(patient_id))) if not os.path.exists(patient_folder_target): os.makedirs(patient_folder_target) scan_subfolder = info['str'].strip() scan_folder_target = os.path.join(patient_folder_target, scan_subfolder) if not os.path.exists(scan_folder_target): os.makedirs(scan_folder_target) scan_type = info['type'] if scan_type == "number": # Directly copy scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) img_file_list = glob.glob(scan_folder_source + '/*.png') for img_file in img_file_list: source_file = img_file target_file = os.path.join(scan_folder_target, os.path.basename(img_file)) copyfile(source_file, target_file) elif scan_type == "slice": scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip() scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) slice_begin = int(info['slice_begin']) slice_end = int(info['slice_end']) for idx in range(slice_begin, slice_end + 1): source_file = os.path.join( scan_folder_source, 'IM-0001-{:04d}-0001.png'.format(idx)) target_file = os.path.join( scan_folder_target, 'IM-0001-{:04d}-0001.png'.format(idx)) copyfile(source_file, target_file) elif scan_type == "odd": scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip() scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) img_file_list = glob.glob(scan_folder_source + '/*.png') for img_file in img_file_list: digit = int(img_file[-10]) if digit % 2 == 1: source_file = img_file target_file = os.path.join(scan_folder_target, os.path.basename(img_file)) copyfile(source_file, target_file) elif scan_type == "even": scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip() scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) img_file_list = glob.glob(scan_folder_source + '/*.png') for img_file in img_file_list: digit = int(img_file[-10]) if digit % 2 == 0: source_file = img_file target_file = os.path.join(scan_folder_target, os.path.basename(img_file)) copyfile(source_file, target_file)
def organize_dataset(input_folder, info_csv, map_txt, output_folder): ''' Separate the original dicome folder into subfolders that are consistent to redcap info. This function should be run after the data have been checked. Zhe Zhu 2019/12/10 :param input_folder: :param info_csv: :param map_txt: :param output_folder: :return: ''' dt = {'names': ('series_name', 'label'), 'formats': ('S20', 'i2')} name_label_list = np.loadtxt(map_txt, dtype=dt) name_label_dict = {} for i in range(len(name_label_list)): name_label_dict[name_label_list[i][0]] = name_label_list[i][1] name_label_dict['dwi_t2'] = name_label_dict['dwi_and_t2'] name_label_dict['t2'] = name_label_dict['dwi_and_t2'] name_label_dict['hepatocyte'] = name_label_dict['hepa_trans'] name_label_dict['transitional'] = name_label_dict['hepa_trans'] count = 0 series_info = parse_reader_csv.parse(info_csv, name_label_dict) for info in series_info: count += 1 print count patient_id = info['patient_id'] patient_folder_source = os.path.join( input_folder, 'LRML_{:04d}'.format(int(patient_id))) patient_folder_target = os.path.join( output_folder, 'LRML_{:04d}'.format(int(patient_id))) if not os.path.exists(patient_folder_target): os.makedirs(patient_folder_target) scan_subfolder = info['str'].strip() scan_folder_target = os.path.join(patient_folder_target, scan_subfolder) if not os.path.exists(scan_folder_target): os.makedirs(scan_folder_target) scan_type = info['type'] if scan_type == "number": # Directly copy scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) dicom_file_list = glob.glob(scan_folder_source + '/*.dcm') for dicom_file in dicom_file_list: source_file = dicom_file target_file = os.path.join(scan_folder_target, os.path.basename(dicom_file)) copyfile(source_file, target_file) elif scan_type == "slice": scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip() scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) slice_begin = int(info['slice_begin']) slice_end = int(info['slice_end']) for idx in range(slice_begin, slice_end + 1): source_file = os.path.join(scan_folder_source, '{:04d}.dcm'.format(idx)) target_file = os.path.join(scan_folder_target, '{:04d}.dcm'.format(idx)) copyfile(source_file, target_file) elif scan_type == "odd": scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip() scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) dicom_file_list = glob.glob(scan_folder_source + '/*.dcm') for dicom_file in dicom_file_list: digit = int(dicom_file[-5]) if digit % 2 == 1: source_file = dicom_file target_file = os.path.join(scan_folder_target, os.path.basename(dicom_file)) copyfile(source_file, target_file) elif scan_type == "even": scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip() scan_folder_source = os.path.join(patient_folder_source, scan_subfolder) dicom_file_list = glob.glob(scan_folder_source + '/*.dcm') for dicom_file in dicom_file_list: digit = int(dicom_file[-5]) if digit % 2 == 0: source_file = dicom_file target_file = os.path.join(scan_folder_target, os.path.basename(dicom_file)) copyfile(source_file, target_file)