def process_data(): all_data = [] img_size = 256 contour_path = os.path.join(c.data_manual, 'manual_contours_ch4', 'contours') image_path = os.path.join(c.data_manual, 'manual_contours_ch4', 'images') for fn in [f for f in os.listdir(contour_path) if 'jpg' in f]: if not os.path.exists(os.path.join(image_path, fn)): continue img = cv2.imread(os.path.join(image_path, fn), 0) img = cv2.resize(img, (img_size, img_size)).reshape(1, 1, img_size, img_size) label = cv2.imread(os.path.join(contour_path, fn), 0) label = cv2.resize(label, (img_size, img_size)) _, label = cv2.threshold(label, 127, 255, cv2.THRESH_BINARY_INV) label = label.reshape(1, 1, img_size, img_size) / 255 all_data.append([img, label]) np.random.shuffle(all_data) all_imgs = np.concatenate([a[0] for a in all_data], axis=0) all_labels = np.concatenate([a[1] for a in all_data], axis=0) n = all_imgs.shape[0] destpath = os.path.join(c.data_intermediate, 'ch4_{}.hdf5'.format(img_size)) if os.path.exists(destpath): os.remove(destpath) u.save_hd5py({'images': all_imgs, 'labels': all_labels}, destpath, 5)
def main(dest='', filetype='hdf5', frame_size=64, seq_len=128, seqs=5, ball_width=7, balls_per_image=6): dat = generate_frames(shape=(frame_size,frame_size), seq_len=seq_len, seqs=seqs, \ ball_width=ball_width, balls_per_image=balls_per_image) if filetype == 'hdf5': u.save_hd5py({'images': dat}, dest, 10) elif filetype == 'npz': np.savez(dest, dat)
def process_data_hdf5(): contour_paths = [tr_contour_path, val_contour_path, onl_contour_path] image_paths = [tr_img_path, val_img_path, onl_img_path] all_data = [] for contour_path, img_path in zip(contour_paths, image_paths): train_ctrs = get_all_contours(contour_path) imgs, labels = process_contours(train_ctrs, img_path) all_data.append([imgs, labels]) # add in manually segmented images, labels in {0,1} aug_contour_path = os.path.join(c.data_manual, 'manual_contours', 'contours') aug_image_path = os.path.join(c.data_manual, 'manual_contours', 'images') for cfn in [fn_ for fn_ in os.listdir(aug_contour_path) if 'jpg' in fn_]: if 'auto' in cfn: continue dcm_img = cv2.imread(os.path.join(aug_image_path, cfn), 0) dcm_img = cv2.resize(dcm_img, (img_size, img_size)).reshape( 1, 1, img_size, img_size) contour_img = cv2.imread(os.path.join(aug_contour_path, cfn), 0) contour_img = cv2.resize(contour_img, (img_size, img_size)) _, contour_img = cv2.threshold(contour_img, 127, 255, cv2.THRESH_BINARY_INV) contour_img = contour_img.reshape(1, 1, img_size, img_size) / 255 all_data.append([dcm_img, contour_img]) ##add in no contour images!! with open(os.path.join(c.data_manual, 'nocontour_tencia.csv')) as f: label = np.zeros((1, 1, img_size, img_size), dtype=np.uint8) for l in f: row = [int(x) for x in l.split(',')] case = row[0] s = row[1::2] t = row[2::2] assert (len(s) == len(t)) dset = du.CNN_Dataset(case, img_size=img_size) n = len(s) print("add case {} no contour imgs".format(case)) for i in range(n): img = dset.images[s[i], t[i], 0].reshape(1, 1, img_size, img_size) all_data.append([img, label]) np.random.shuffle(all_data) all_imgs = np.concatenate([a[0] for a in all_data], axis=0) all_labels = np.concatenate([a[1] for a in all_data], axis=0) n = all_imgs.shape[0] fn = os.path.join(c.data_intermediate, 'scd_seg_{}.hdf5'.format(img_size)) if os.path.exists(fn): os.remove(fn) u.save_hd5py({'images': all_imgs, 'labels': all_labels}, fn, 5)
def process_data_hdf5(): contour_paths = [tr_contour_path, val_contour_path, onl_contour_path] image_paths = [tr_img_path, val_img_path, onl_img_path] all_data = [] for contour_path, img_path in zip(contour_paths, image_paths): train_ctrs = get_all_contours(contour_path) imgs,labels = process_contours(train_ctrs, img_path) all_data.append([imgs,labels]) # add in manually segmented images, labels in {0,1} aug_contour_path = os.path.join(c.data_manual, 'manual_contours', 'contours') aug_image_path = os.path.join(c.data_manual, 'manual_contours', 'images') for cfn in [fn_ for fn_ in os.listdir(aug_contour_path) if 'jpg' in fn_]: if 'auto' in cfn: continue dcm_img = cv2.imread(os.path.join(aug_image_path, cfn), 0) dcm_img = cv2.resize(dcm_img, (img_size, img_size)).reshape(1, 1, img_size, img_size) contour_img = cv2.imread(os.path.join(aug_contour_path, cfn), 0) contour_img = cv2.resize(contour_img, (img_size, img_size)) _,contour_img = cv2.threshold(contour_img, 127, 255,cv2.THRESH_BINARY_INV) contour_img = contour_img.reshape(1, 1, img_size, img_size) / 255 all_data.append([dcm_img, contour_img]) ##add in no contour images!! with open(os.path.join(c.data_manual, 'nocontour_tencia.csv')) as f: label = np.zeros((1,1,img_size,img_size), dtype=np.uint8); for l in f: row = [int(x) for x in l.split(',')]; case = row[0]; s = row[1::2]; t = row[2::2]; assert(len(s)==len(t)); dset = du.CNN_Dataset(case, img_size=img_size) n = len(s); print("add case {} no contour imgs".format(case)) for i in range(n): img = dset.images[s[i],t[i],0].reshape(1,1,img_size,img_size) all_data.append([img,label]) np.random.shuffle(all_data) all_imgs = np.concatenate([a[0] for a in all_data], axis=0) all_labels = np.concatenate([a[1] for a in all_data], axis=0) n = all_imgs.shape[0] fn = os.path.join(c.data_intermediate, 'scd_seg_{}.hdf5'.format(img_size)) if os.path.exists(fn): os.remove(fn) u.save_hd5py({'images': all_imgs, 'labels': all_labels}, fn, 5)
def process_data(): all_data = [] img_size = 256 contour_path= os.path.join(c.data_manual, 'manual_contours_ch4', 'contours') image_path = os.path.join(c.data_manual, 'manual_contours_ch4', 'images') for fn in [f for f in os.listdir(contour_path) if 'jpg' in f]: if not os.path.exists(os.path.join(image_path, fn)): continue img = cv2.imread(os.path.join(image_path, fn), 0) img = cv2.resize(img, (img_size,img_size)).reshape(1,1,img_size,img_size) label = cv2.imread(os.path.join(contour_path, fn), 0) label = cv2.resize(label, (img_size,img_size)) _,label = cv2.threshold(label, 127,255,cv2.THRESH_BINARY_INV) label = label.reshape(1,1,img_size,img_size)/255 all_data.append([img,label]) np.random.shuffle(all_data) all_imgs = np.concatenate([a[0] for a in all_data], axis=0) all_labels = np.concatenate([a[1] for a in all_data], axis=0) n = all_imgs.shape[0] destpath = os.path.join(c.data_intermediate, 'ch4_{}.hdf5'.format(img_size)) if os.path.exists(destpath): os.remove(destpath) u.save_hd5py({'images': all_imgs, 'labels': all_labels}, destpath, 5)
caption_words = list(set([word for caption_list in captions for word in caption_list])) with open(c.words_used_file, 'w') as wr: wr.writelines('{}\n'.format(word) for word in caption_words) words_to_idx = dict((w,i+1) for i,w in enumerate(caption_words)) idx_to_words = dict((i+1,w) for i,w in enumerate(caption_words)) idx_to_words[0] = '<e>' captions = dict([(int(line.split(',')[0]), line.strip().split(',')[1]) for line in lines]) # create dataset to save as hdf5 from PIL import Image import os import utils as u idx = 0 label = 0 caption_matrix = np.zeros((ntotal, c.max_caption_len), dtype=np.uint16) img_matrix = np.empty((ntotal, 3, c.img_size, c.img_size), dtype=np.uint8) for label in captions: vector = np.asarray([words_to_idx[w] for w in captions[label].split(' ')], dtype=np.uint16) caption_matrix[idx, 0:vector.size] = vector im = Image.open(os.path.join(c.images_dir, '{}.jpg'.format(label))) img_matrix[idx] = u.arr_from_img_storage(im) idx += 1 if idx % 400 == 0: print 'loaded img {}'.format(idx) ntrain = int(ntotal * .95) indices_dict = {'train': (0, ntrain), 'test': (ntrain, ntotal)} u.save_hd5py({'images': img_matrix, 'captions': caption_matrix}, c.twimg_hdf5_file, indices_dict)
idx_to_words = dict((i + 1, w) for i, w in enumerate(caption_words)) idx_to_words[0] = '<e>' captions = dict([(int(line.split(',')[0]), line.strip().split(',')[1]) for line in lines]) # create dataset to save as hdf5 from PIL import Image import os import utils as u idx = 0 label = 0 caption_matrix = np.zeros((ntotal, c.max_caption_len), dtype=np.uint16) img_matrix = np.empty((ntotal, 3, c.img_size, c.img_size), dtype=np.uint8) for label in captions: vector = np.asarray([words_to_idx[w] for w in captions[label].split(' ')], dtype=np.uint16) caption_matrix[idx, 0:vector.size] = vector im = Image.open(os.path.join(c.images_dir, '{}.jpg'.format(label))) img_matrix[idx] = u.arr_from_img_storage(im) idx += 1 if idx % 400 == 0: print 'loaded img {}'.format(idx) ntrain = int(ntotal * .95) indices_dict = {'train': (0, ntrain), 'test': (ntrain, ntotal)} u.save_hd5py({ 'images': img_matrix, 'captions': caption_matrix }, c.twimg_hdf5_file, indices_dict)