def get_data(raw_data_path, set_id): save_path = os.path.join(DATA_DIR, set_id) if os.path.isdir(save_path): pass #sys.exit('{} is already exist'.format(save_path)) else: os.makedirs(save_path) data = np.load(raw_data_path) for pf in range(0,5): tfrecord_filename = os.path.join(save_path, PRE+str(pf)+'.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) for index in range (0, 2000): data_point = data[0:10,index+pf*2000,:,:] data_point = data_point.astype(np.uint8) data_point = data_point.tostring() label = data[10:20,index+pf*2000,:,:] label = label.astype(np.uint8) label = label.tostring() example = dataset_utils.image_to_tfexample_segmentation(data_point, label) tfrecord_writer.write(example.SerializeToString())
def get_data_mat_allinone(raw_data_path, set_id, subject_index): f = h5py.File(raw_data_path, 'r') save_path = os.path.join(DATA_DIR, set_id) if os.path.isdir(save_path): pass #sys.exit('{} is already exist'.format(save_path)) else: os.makedirs(save_path) log_f = open(os.path.join(DATA_DIR, set_id + '_info'), 'a') total = 0 dataset_stats = {} for each_class in CLASS: dataset_stats.setdefault(each_class, 0) for pf in subject_index: tfrecord_filename = os.path.join(save_path, PRE + str(pf) + '.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) refinput = f[DATASET][NAME][pf] refoutput = f[DATASET][MASK][pf] num = f[refinput[0]].shape[0] print(num) total += num subject_stats = {} for each_class in CLASS: subject_stats.setdefault(each_class, 0) Labels = f[refoutput[0]][:] for ind in range(num): Input = f[refinput[0]][ind, :, :] Label = f[refoutput[0]][ind, :, :] / 255 if np.sum(Label) > 0: subject_stats[1] += 1 dataset_stats[1] += 1 else: subject_stats[0] += 1 dataset_stats[0] += 1 Label = Label.astype(np.int8) data_point = Input.tostring() label = Label.tostring() example = dataset_utils.image_to_tfexample_segmentation( data_point, label, subject_id=pf, index=ind) tfrecord_writer.write(example.SerializeToString()) print('Finish writing data from {}'.format(pf)) log_f.write('{}: {}\t {}\n'.format(pf, num, json.dumps(subject_stats))) log_f.write('{}'.format(json.dumps(dataset_stats)))
def get_data_LV2011(raw_data_path, set_id): f = h5py.File(raw_data_path, 'r') save_path = os.path.join(DATA_DIR, set_id) if os.path.isdir(save_path): pass #sys.exit('{} is already exist'.format(save_path)) else: os.makedirs(save_path) log_f = open(os.path.join(DATA_DIR, set_id + '_info'), 'a') total = 0 subject_index = list(f['location'].keys()) #subject_index = subject_index[57:77] for pid, pf in enumerate(subject_index): tfrecord_filename = os.path.join(save_path, PRE + str(100 + pid) + '.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) refinput = f['input/{}'.format(pf)][:] refoutput = f['label/{}'.format(pf)][:].astype(np.bool).astype(np.int8) num = f['input/{}'.format(pf)].attrs['Num'].astype(np.int)[0] print(num) total += num for ind in range(num): Input = refinput[ind, :, :, 0] #Input = Input.astype(np.uint8) #print(np.max(Input)) Label = refoutput[ind, :, :, :] Label = Label > 0.5 #print(np.max(Label)) #w = Label[:,:,1] #i = Label[:,:,2] #print(np.sum(w)) #Label = np.zeros(w.shape) + w + i*2 #print(np.sum(w==1)) Label = Label.astype(np.int8) data_point = Input.tostring() label = Label.tostring() example = dataset_utils.image_to_tfexample_segmentation( data_point, label, subject_id=pid, index=ind) tfrecord_writer.write(example.SerializeToString()) print('Finish writing data from {}'.format(pid)) log_f.write('{}_{}: {}\n'.format(pid, pf, num)) log_f.write('In total: {}'.format(total))
def get_data_pickle(raw_data_path, set_id, seq_length, stride_frame, stride_seq, subject_index=[]): save_path = os.path.join( DATA_DIR, set_id + '_{}_{}_{}'.format(seq_length, stride_frame, stride_seq)) if os.path.isdir(save_path): pass #sys.exit('{} is already exist'.format(save_path)) else: os.makedirs(save_path) log_f = open( os.path.join( DATA_DIR, set_id + '_{}_{}_{}'.format(seq_length, stride_frame, stride_seq) + '_info'), 'a') for idx, pf in enumerate(subject_index): tfrecord_filename = os.path.join(save_path, PRE + str(idx) + '.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) total = 0 files = glob.glob(os.path.join(raw_data_path, '*' + pf + '*')) for each_file in files: fname = each_file.split('/')[-1] with open(each_file, 'rb') as rf: [imgs, annots, eye, head, mouth] = pickle.load(rf) total += imgs.shape[0] for index in range(0, len(imgs) - (seq_length + 1) * stride_frame, stride_seq): data_point = imgs[index:index + seq_length * stride_frame:stride_frame, :, :] label = imgs[index + seq_length * stride_frame, :, :] data_point = data_point.astype(np.uint8) label = label.astype(np.uint8) data_point = data_point.tostring() label = label.tostring() example = dataset_utils.image_to_tfexample_segmentation( data_point, label) tfrecord_writer.write(example.SerializeToString()) rf.close() print('Finish extracting data from %s' % (each_file)) print('Finish writing data from {}'.format(pf)) log_f.write('{}: {}\n'.format(pf, total))
def get_data_Sunny(raw_data_path, set_id): f = h5py.File(raw_data_path, 'r') save_path = os.path.join(DATA_DIR, set_id) if os.path.isdir(save_path): pass #sys.exit('{} is already exist'.format(save_path)) else: os.makedirs(save_path) log_f = open(os.path.join(DATA_DIR, set_id + '_info'), 'a') total = 0 for prex, dataset in enumerate(['train', 'val', 'test']): subject_index = list(f['{}/location'.format(dataset)].keys()) for pid, pf in enumerate(subject_index): tfrecord_filename = os.path.join( save_path, PRE + str(pid + prex * 15) + '.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) refinput = f['{}/input/{}'.format(dataset, pf)][:] refoutput = f['{}/label/{}'.format(dataset, pf)][:].astype( np.bool).astype(np.int8) num = f['{}/input/{}'.format(dataset, pf)].attrs['Num'].astype(np.int) num = num[0] print(num) total += num for ind in range(num): Input = refinput[ind, :, :] Label = refoutput[ind, :, :, :] wall = Label[:, :, 1] endo = Label[:, :, 2] Label = np.zeros(wall.shape) Label = Label + wall + endo * 2 Label = Label.astype(np.int8) data_point = Input.tostring() label = Label.tostring() example = dataset_utils.image_to_tfexample_segmentation( data_point, label, subject_id=pid, index=ind) tfrecord_writer.write(example.SerializeToString()) print('Finish writing data from {}'.format(pid)) log_f.write('{}_{}: {}\n'.format(pid, pf, num)) log_f.write('In total: {}'.format(total))
def read_ACDC(path, set_id): #path = '/Users/HemingY/Developer/LeftVentricle/' pids = os.listdir(path) pids = [x for x in pids if 'patient' in x] pids = sorted(pids, key=lambda x: int(x.split('patient')[-1])) save_path = os.path.join(DATA_DIR, set_id) if os.path.isdir(save_path): pass #sys.exit('{} is already exist'.format(save_path)) else: os.makedirs(save_path) log_f = open(os.path.join(DATA_DIR, set_id + '_info'), 'a') total_es = 0 total_ed = 0 for pf, pid in enumerate(pids): pidpath = os.path.join(path, pid) cfg = configparser.ConfigParser() filename = os.path.join(pidpath, 'Info.cfg') with open(filename) as fp: cfg.read_file(itertools.chain(['[global]'], fp), source=filename) secs = dict(cfg.items('global')) # For ED ed = int(secs['ed']) name = '{0}_frame{1:02d}.nii.gz'.format(pid, ed) name_gt = '{0}_frame{1:02d}_gt.nii.gz'.format(pid, ed) data_ed, label_ed = read_imgs_labels(pidpath, name, name_gt) # For ES es = int(secs['es']) name = '{0}_frame{1:02d}.nii.gz'.format(pid, es) name_gt = '{0}_frame{1:02d}_gt.nii.gz'.format(pid, es) data_es, label_es = read_imgs_labels(pidpath, name, name_gt) # Write tfrecords tfrecord_filename = os.path.join(save_path, str(pf) + '_es.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) data_es = data_es / np.max(data_es) for ind in range(label_es.shape[0]): Label = label_es[ind] Label = Label.astype(np.int8) Input = data_es[ind] data_point = Input.tostring() label = Label.tostring() example = dataset_utils.image_to_tfexample_segmentation( data_point, label, subject_id=pf, index=ind + 100) tfrecord_writer.write(example.SerializeToString()) tfrecord_filename = os.path.join(save_path, str(pf) + '_ed.tfrecord') tfrecord_writer = tf.python_io.TFRecordWriter(tfrecord_filename) data_ed = data_ed / np.max(data_ed) for ind in range(label_ed.shape[0]): Label = label_ed[ind] Label = Label.astype(np.int8) Input = data_ed[ind] data_point = Input.tostring() label = Label.tostring() example = dataset_utils.image_to_tfexample_segmentation( data_point, label, subject_id=pf, index=ind) tfrecord_writer.write(example.SerializeToString()) total_es += label_es.shape[0] total_ed += label_ed.shape[0] print('Finish writing data from {}'.format(pid)) log_f.write('{}: es: {}; ed: {}\n'.format(pid, label_es.shape[0], label_ed.shape[0])) log_f.write('In total: es: {}; ed: {}'.format(total_es, total_ed))