예제 #1
0
def compare_readings(reading_gt_file, reading_pred_file):
    map_dict = load_map_dict(
        '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv',
        3, 29)
    reading_gt = parse.parse(reading_gt_file, map_dict)
    reading_pred = parse.parse(reading_pred_file, map_dict)
    count = 0
    correct = 0
    print('Comparing {0} and {1}'.format(os.path.basename(reading_gt_file),
                                         os.path.basename(reading_pred_file)))

    reading_map_gt = {}
    for read in reading_gt:
        reading_map_gt[read['patient_id'].strip() + read['str'].strip()] = read
    reading_map_pred = {}
    for read in reading_pred:
        reading_map_pred[read['patient_id'].strip() +
                         read['str'].strip()] = read

    for key in reading_map_gt:
        pred_label = reading_map_pred[key]['series_label']
        gt_label = reading_map_gt[key]['series_label']
        if gt_label == pred_label:
            correct += 1
        count += 1

    print('Total:{0} Correct:{1} Acc:{2}'.format(count, correct,
                                                 float(correct) /
                                                 float(count)))
예제 #2
0
def load_reading(reading_file):
    '''Load the reader file into dict format'''
    map_txt = '/home/zzhu/Data/data/ai_vs_radiologist/map_fatonly.txt'

    dt = {'names': ('series_name', 'label'), 'formats': ('S20', 'i2')}
    name_label_list = np.loadtxt(map_txt, dtype=dt)
    name_label_dict = {}
    for i in range(len(name_label_list)):
        name_label_dict[name_label_list[i][0].decode(
            "utf-8")] = name_label_list[i][1]
    name_label_dict['dwi_t2'] = name_label_dict['dwi_and_t2']
    name_label_dict['t2'] = name_label_dict['dwi_and_t2']
    name_label_dict['hepatocyte'] = name_label_dict['hepa_trans']
    name_label_dict['transitional'] = name_label_dict['hepa_trans']
    info_dict_file = reading_file

    col_begin = 5
    col_end = 43
    # deal with anything else classes
    with open(info_dict_file, 'r') as csv_file:
        rows = csv.reader(csv_file, delimiter=',')
        header = next(rows, None)
        for i in range(col_begin, col_end):
            series_name = header[i]
            if series_name not in name_label_dict:
                name_label_dict[series_name] = name_label_dict['anythingelse']

    # load info_dict
    series_info = parse_reader_csv.parse(info_dict_file, name_label_dict)
    reading_dict = {}
    for info in series_info:
        series_id = info['series_id']
        series_label = info['series_label']
        reading_dict[series_id] = series_label
    return reading_dict
예제 #3
0
def compare_two_readings(reading1_file, reading2_file, map_dict):
    print('Comparing {0} and {1}'.format(reading1_file, reading2_file))
    reading1 = parse.parse(reading1_file, map_dict)
    reading2 = parse.parse(reading2_file, map_dict)

    rs1 = set()
    for r in reading1:
        if r['series_id'] in rs1:
            print('Repeated item:{0}'.format(r))
        rs1.add(r['series_id'])
    rs2 = set()
    for r in reading2:
        if r['series_id'] in rs2:
            print('Repeated item:{0}'.format(r))
        rs2.add(r['series_id'])

    # Check repeated items first
    if len(reading1) != len(rs1):
        print('reading 1 may contain repeated items')
        print('from list:{0}'.format(len(reading1)))
    if len(reading2) != len(rs2):
        print('reading 2 may contain repeated items')
        print('from list:{0}'.format(len(reading2)))

    print('{0} reading contains {1} items'.format(
        os.path.basename(reading1_file), len(rs1)))
    print('{0} reading contains {1} items'.format(
        os.path.basename(reading2_file), len(rs2)))
    diff1 = rs1 - rs2
    if diff1:
        print('In {0} but not in {1}:'.format(os.path.basename(reading1_file),
                                              os.path.basename(reading2_file)))
        for item in diff1:
            print(item)
    diff2 = rs2 - rs1
    if diff2:
        print('In {0} but not in {1}:'.format(os.path.basename(reading2_file),
                                              os.path.basename(reading1_file)))
        for item in diff2:
            print(item)
예제 #4
0
def test_6():
    # construct dataset. Each instance is a 3D volume.
    reference_file = '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv'
    patient_folder = '/media/zzhu/Seagate Backup Plus Drive/data/Amber/png_series'
    tfrecord_file = '/media/zzhu/Seagate Backup Plus Drive/data/Amber/tfrecord/external_validation.tfrecord'

    map_dict = load_map_dict(
        '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv',
        3, 29)
    reading = parse.parse(
        '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv',
        map_dict)
    reading_map = {}
    for read in reading:
        reading_map[read['patient_id'] + read['str'].strip()] = read
    patient_folder_list = glob.glob(patient_folder + '/*')
    series_num = 0
    with tf.python_io.TFRecordWriter(tfrecord_file) as writer:
        for patient_folder in patient_folder_list:
            patient_id = os.path.basename(patient_folder)[-3:]
            series_folder_list = glob.glob(patient_folder + '/*')
            for series_folder in series_folder_list:
                folder_name = os.path.basename(series_folder).strip()
                read = reading_map[patient_id + folder_name]
                series_str = read['str']
                patient_id = read['patient_id']
                series_label = read['series_label']
                slice_file_list = glob.glob(series_folder + '/*')
                #print(slice_file_list[0])
                if len(slice_file_list) == 0:
                    print(series_folder)
                series_num += 1
                if series_num % 50 == 0:
                    print(series_num)
                anchor_slice_file = slice_file_list[0]
                anchor_slice = cv2.imread(anchor_slice_file, 0)
                slice_height = anchor_slice.shape[0]
                slice_width = anchor_slice.shape[1]
                slice_depth = len(slice_file_list)
                volume = np.zeros((slice_height, slice_width, slice_depth),
                                  dtype=np.uint8)
                slice_idx = 0
                for slice_file in slice_file_list:
                    slice = cv2.imread(slice_file, 0)
                    if slice.shape[0] != volume.shape[0]:
                        print(slice_file_list[0])
                        slice = cv2.resize(slice,
                                           (volume.shape[1], volume.shape[0]))
                    volume[:, :, slice_idx] = slice
                    slice_idx += 1
                volume_raw = volume.tostring()
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'height': _int64_feature(slice_height),
                        'width': _int64_feature(slice_width),
                        'slice': _int64_feature(slice_depth),
                        'label': _int64_feature(series_label),
                        'series_str': _bytes_feature(series_str),
                        'patient_id': _bytes_feature(patient_id),
                        'volume_raw': _bytes_feature(volume_raw)
                    }))
                writer.write(example.SerializeToString())
    print('In total {0} series'.format(series_num))
예제 #5
0
def separate_series(input_folder, output_folder):
    # use mustafa's file as reference
    map_dict = load_map_dict(
        '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv',
        3, 29)
    reading = parse.parse(
        '/media/zzhu/Seagate Backup Plus Drive/data/Amber/LIRADSMachineLearnin_DATA_mustafa.csv',
        map_dict)
    count = 0
    for info in reading:
        count += 1
        if count % 25 == 0:
            print(count)
        patient_id = info['patient_id']
        patient_folder_source = os.path.join(
            input_folder, 'LRML_{:04d}'.format(int(patient_id)))
        patient_folder_target = os.path.join(
            output_folder, 'LRML_{:04d}'.format(int(patient_id)))
        if not os.path.exists(patient_folder_target):
            os.makedirs(patient_folder_target)
        scan_subfolder = info['str'].strip()
        scan_folder_target = os.path.join(patient_folder_target,
                                          scan_subfolder)
        if not os.path.exists(scan_folder_target):
            os.makedirs(scan_folder_target)

        scan_type = info['type']
        if scan_type == "number":
            # Directly copy
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            img_file_list = glob.glob(scan_folder_source + '/*.png')
            for img_file in img_file_list:
                source_file = img_file
                target_file = os.path.join(scan_folder_target,
                                           os.path.basename(img_file))
                copyfile(source_file, target_file)
        elif scan_type == "slice":
            scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip()
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            slice_begin = int(info['slice_begin'])
            slice_end = int(info['slice_end'])
            for idx in range(slice_begin, slice_end + 1):
                source_file = os.path.join(
                    scan_folder_source, 'IM-0001-{:04d}-0001.png'.format(idx))
                target_file = os.path.join(
                    scan_folder_target, 'IM-0001-{:04d}-0001.png'.format(idx))
                copyfile(source_file, target_file)
        elif scan_type == "odd":
            scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip()
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            img_file_list = glob.glob(scan_folder_source + '/*.png')
            for img_file in img_file_list:
                digit = int(img_file[-10])
                if digit % 2 == 1:
                    source_file = img_file
                    target_file = os.path.join(scan_folder_target,
                                               os.path.basename(img_file))
                    copyfile(source_file, target_file)
        elif scan_type == "even":
            scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip()
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            img_file_list = glob.glob(scan_folder_source + '/*.png')
            for img_file in img_file_list:
                digit = int(img_file[-10])
                if digit % 2 == 0:
                    source_file = img_file
                    target_file = os.path.join(scan_folder_target,
                                               os.path.basename(img_file))
                    copyfile(source_file, target_file)
예제 #6
0
def organize_dataset(input_folder, info_csv, map_txt, output_folder):
    '''
    Separate the original dicome folder into subfolders that are consistent to redcap info.
    This function should be run after the data have been checked.
    Zhe Zhu 2019/12/10
    :param input_folder:
    :param info_csv:
    :param map_txt:
    :param output_folder:
    :return:
    '''
    dt = {'names': ('series_name', 'label'), 'formats': ('S20', 'i2')}
    name_label_list = np.loadtxt(map_txt, dtype=dt)
    name_label_dict = {}
    for i in range(len(name_label_list)):
        name_label_dict[name_label_list[i][0]] = name_label_list[i][1]
    name_label_dict['dwi_t2'] = name_label_dict['dwi_and_t2']
    name_label_dict['t2'] = name_label_dict['dwi_and_t2']
    name_label_dict['hepatocyte'] = name_label_dict['hepa_trans']
    name_label_dict['transitional'] = name_label_dict['hepa_trans']

    count = 0
    series_info = parse_reader_csv.parse(info_csv, name_label_dict)
    for info in series_info:
        count += 1
        print count
        patient_id = info['patient_id']
        patient_folder_source = os.path.join(
            input_folder, 'LRML_{:04d}'.format(int(patient_id)))
        patient_folder_target = os.path.join(
            output_folder, 'LRML_{:04d}'.format(int(patient_id)))
        if not os.path.exists(patient_folder_target):
            os.makedirs(patient_folder_target)
        scan_subfolder = info['str'].strip()
        scan_folder_target = os.path.join(patient_folder_target,
                                          scan_subfolder)
        if not os.path.exists(scan_folder_target):
            os.makedirs(scan_folder_target)

        scan_type = info['type']
        if scan_type == "number":
            # Directly copy
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            dicom_file_list = glob.glob(scan_folder_source + '/*.dcm')
            for dicom_file in dicom_file_list:
                source_file = dicom_file
                target_file = os.path.join(scan_folder_target,
                                           os.path.basename(dicom_file))
                copyfile(source_file, target_file)
        elif scan_type == "slice":
            scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip()
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            slice_begin = int(info['slice_begin'])
            slice_end = int(info['slice_end'])
            for idx in range(slice_begin, slice_end + 1):
                source_file = os.path.join(scan_folder_source,
                                           '{:04d}.dcm'.format(idx))
                target_file = os.path.join(scan_folder_target,
                                           '{:04d}.dcm'.format(idx))
                copyfile(source_file, target_file)
        elif scan_type == "odd":
            scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip()
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            dicom_file_list = glob.glob(scan_folder_source + '/*.dcm')
            for dicom_file in dicom_file_list:
                digit = int(dicom_file[-5])
                if digit % 2 == 1:
                    source_file = dicom_file
                    target_file = os.path.join(scan_folder_target,
                                               os.path.basename(dicom_file))
                    copyfile(source_file, target_file)
        elif scan_type == "even":
            scan_subfolder = scan_subfolder[:scan_subfolder.find('(')].strip()
            scan_folder_source = os.path.join(patient_folder_source,
                                              scan_subfolder)
            dicom_file_list = glob.glob(scan_folder_source + '/*.dcm')
            for dicom_file in dicom_file_list:
                digit = int(dicom_file[-5])
                if digit % 2 == 0:
                    source_file = dicom_file
                    target_file = os.path.join(scan_folder_target,
                                               os.path.basename(dicom_file))
                    copyfile(source_file, target_file)