Example #1
0
def process_data():
    all_data = []
    img_size = 256
    contour_path = os.path.join(c.data_manual, 'manual_contours_ch4',
                                'contours')
    image_path = os.path.join(c.data_manual, 'manual_contours_ch4', 'images')
    for fn in [f for f in os.listdir(contour_path) if 'jpg' in f]:
        if not os.path.exists(os.path.join(image_path, fn)):
            continue
        img = cv2.imread(os.path.join(image_path, fn), 0)
        img = cv2.resize(img,
                         (img_size, img_size)).reshape(1, 1, img_size,
                                                       img_size)
        label = cv2.imread(os.path.join(contour_path, fn), 0)
        label = cv2.resize(label, (img_size, img_size))
        _, label = cv2.threshold(label, 127, 255, cv2.THRESH_BINARY_INV)
        label = label.reshape(1, 1, img_size, img_size) / 255
        all_data.append([img, label])
    np.random.shuffle(all_data)
    all_imgs = np.concatenate([a[0] for a in all_data], axis=0)
    all_labels = np.concatenate([a[1] for a in all_data], axis=0)
    n = all_imgs.shape[0]
    destpath = os.path.join(c.data_intermediate,
                            'ch4_{}.hdf5'.format(img_size))
    if os.path.exists(destpath):
        os.remove(destpath)
    u.save_hd5py({'images': all_imgs, 'labels': all_labels}, destpath, 5)
def main(dest='', filetype='hdf5', frame_size=64, seq_len=128, seqs=5, ball_width=7,
        balls_per_image=6):
    dat = generate_frames(shape=(frame_size,frame_size), seq_len=seq_len, seqs=seqs, \
                                ball_width=ball_width, balls_per_image=balls_per_image)
    if filetype == 'hdf5':
        u.save_hd5py({'images': dat}, dest, 10)
    elif filetype == 'npz':
        np.savez(dest, dat)
Example #3
0
def process_data_hdf5():
    contour_paths = [tr_contour_path, val_contour_path, onl_contour_path]
    image_paths = [tr_img_path, val_img_path, onl_img_path]
    all_data = []

    for contour_path, img_path in zip(contour_paths, image_paths):
        train_ctrs = get_all_contours(contour_path)
        imgs, labels = process_contours(train_ctrs, img_path)
        all_data.append([imgs, labels])

    # add in manually segmented images, labels in {0,1}
    aug_contour_path = os.path.join(c.data_manual, 'manual_contours',
                                    'contours')
    aug_image_path = os.path.join(c.data_manual, 'manual_contours', 'images')
    for cfn in [fn_ for fn_ in os.listdir(aug_contour_path) if 'jpg' in fn_]:
        if 'auto' in cfn:
            continue
        dcm_img = cv2.imread(os.path.join(aug_image_path, cfn), 0)
        dcm_img = cv2.resize(dcm_img, (img_size, img_size)).reshape(
            1, 1, img_size, img_size)
        contour_img = cv2.imread(os.path.join(aug_contour_path, cfn), 0)
        contour_img = cv2.resize(contour_img, (img_size, img_size))
        _, contour_img = cv2.threshold(contour_img, 127, 255,
                                       cv2.THRESH_BINARY_INV)
        contour_img = contour_img.reshape(1, 1, img_size, img_size) / 255
        all_data.append([dcm_img, contour_img])

    ##add in no contour images!!
    with open(os.path.join(c.data_manual, 'nocontour_tencia.csv')) as f:
        label = np.zeros((1, 1, img_size, img_size), dtype=np.uint8)
        for l in f:
            row = [int(x) for x in l.split(',')]
            case = row[0]
            s = row[1::2]
            t = row[2::2]
            assert (len(s) == len(t))
            dset = du.CNN_Dataset(case, img_size=img_size)
            n = len(s)
            print("add case {} no contour imgs".format(case))
            for i in range(n):
                img = dset.images[s[i], t[i],
                                  0].reshape(1, 1, img_size, img_size)
                all_data.append([img, label])

    np.random.shuffle(all_data)

    all_imgs = np.concatenate([a[0] for a in all_data], axis=0)
    all_labels = np.concatenate([a[1] for a in all_data], axis=0)

    n = all_imgs.shape[0]
    fn = os.path.join(c.data_intermediate, 'scd_seg_{}.hdf5'.format(img_size))
    if os.path.exists(fn):
        os.remove(fn)
    u.save_hd5py({'images': all_imgs, 'labels': all_labels}, fn, 5)
def main(dest='',
         filetype='hdf5',
         frame_size=64,
         seq_len=128,
         seqs=5,
         ball_width=7,
         balls_per_image=6):
    dat = generate_frames(shape=(frame_size,frame_size), seq_len=seq_len, seqs=seqs, \
                                ball_width=ball_width, balls_per_image=balls_per_image)
    if filetype == 'hdf5':
        u.save_hd5py({'images': dat}, dest, 10)
    elif filetype == 'npz':
        np.savez(dest, dat)
Example #5
0
def process_data_hdf5():
    contour_paths = [tr_contour_path, val_contour_path, onl_contour_path]
    image_paths = [tr_img_path, val_img_path, onl_img_path]
    all_data = []

    for contour_path, img_path in zip(contour_paths, image_paths):
        train_ctrs = get_all_contours(contour_path)
        imgs,labels = process_contours(train_ctrs, img_path)
        all_data.append([imgs,labels])

    # add in manually segmented images, labels in {0,1}
    aug_contour_path = os.path.join(c.data_manual, 'manual_contours', 'contours')
    aug_image_path = os.path.join(c.data_manual, 'manual_contours', 'images')
    for cfn in [fn_ for fn_ in os.listdir(aug_contour_path) if 'jpg' in fn_]:
        if 'auto' in cfn:
            continue
        dcm_img = cv2.imread(os.path.join(aug_image_path, cfn), 0)
        dcm_img = cv2.resize(dcm_img, (img_size, img_size)).reshape(1, 1, img_size, img_size)
        contour_img = cv2.imread(os.path.join(aug_contour_path, cfn), 0)
        contour_img = cv2.resize(contour_img, (img_size, img_size))
        _,contour_img = cv2.threshold(contour_img, 127, 255,cv2.THRESH_BINARY_INV)
        contour_img = contour_img.reshape(1, 1, img_size, img_size) / 255
        all_data.append([dcm_img, contour_img])

    ##add in no contour images!!
    with open(os.path.join(c.data_manual, 'nocontour_tencia.csv')) as f:
        label = np.zeros((1,1,img_size,img_size), dtype=np.uint8);
        for l in f:
            row = [int(x) for x in l.split(',')];
            case = row[0];
            s = row[1::2];
            t = row[2::2];
            assert(len(s)==len(t));
            dset = du.CNN_Dataset(case, img_size=img_size)
            n = len(s);
            print("add case {} no contour imgs".format(case))
            for i in range(n):
                img = dset.images[s[i],t[i],0].reshape(1,1,img_size,img_size)
                all_data.append([img,label])

    np.random.shuffle(all_data)

    all_imgs = np.concatenate([a[0] for a in all_data], axis=0)
    all_labels = np.concatenate([a[1] for a in all_data], axis=0)

    n = all_imgs.shape[0]
    fn = os.path.join(c.data_intermediate, 'scd_seg_{}.hdf5'.format(img_size))
    if os.path.exists(fn):
        os.remove(fn)
    u.save_hd5py({'images': all_imgs, 'labels': all_labels}, fn, 5)
Example #6
0
def process_data():
    all_data = []
    img_size = 256
    contour_path= os.path.join(c.data_manual, 'manual_contours_ch4', 'contours')
    image_path = os.path.join(c.data_manual, 'manual_contours_ch4', 'images')
    for fn in [f for f in os.listdir(contour_path) if 'jpg' in f]:
        if not os.path.exists(os.path.join(image_path, fn)):
            continue
        img = cv2.imread(os.path.join(image_path, fn), 0)
        img = cv2.resize(img, (img_size,img_size)).reshape(1,1,img_size,img_size)
        label = cv2.imread(os.path.join(contour_path, fn), 0)
        label = cv2.resize(label, (img_size,img_size))
        _,label = cv2.threshold(label, 127,255,cv2.THRESH_BINARY_INV)
        label = label.reshape(1,1,img_size,img_size)/255
        all_data.append([img,label])
    np.random.shuffle(all_data)
    all_imgs = np.concatenate([a[0] for a in all_data], axis=0)
    all_labels = np.concatenate([a[1] for a in all_data], axis=0)
    n = all_imgs.shape[0]
    destpath = os.path.join(c.data_intermediate, 'ch4_{}.hdf5'.format(img_size))
    if os.path.exists(destpath):
        os.remove(destpath)
    u.save_hd5py({'images': all_imgs, 'labels': all_labels}, destpath, 5)
Example #7
0
caption_words = list(set([word for caption_list in captions for word in caption_list]))
with open(c.words_used_file, 'w') as wr:
    wr.writelines('{}\n'.format(word) for word in caption_words)
words_to_idx = dict((w,i+1) for i,w in enumerate(caption_words))
idx_to_words = dict((i+1,w) for i,w in enumerate(caption_words))
idx_to_words[0] = '<e>'
captions = dict([(int(line.split(',')[0]), line.strip().split(',')[1]) for line in lines])

# create dataset to save as hdf5
from PIL import Image
import os
import utils as u

idx = 0
label = 0
caption_matrix = np.zeros((ntotal, c.max_caption_len), dtype=np.uint16)
img_matrix = np.empty((ntotal, 3, c.img_size, c.img_size), dtype=np.uint8)
for label in captions:
    vector = np.asarray([words_to_idx[w] for w in captions[label].split(' ')], dtype=np.uint16)
    caption_matrix[idx, 0:vector.size] = vector
    im = Image.open(os.path.join(c.images_dir, '{}.jpg'.format(label)))
    img_matrix[idx] = u.arr_from_img_storage(im)
    idx += 1
    if idx % 400 == 0:
        print 'loaded img {}'.format(idx)

ntrain = int(ntotal * .95)
indices_dict = {'train': (0, ntrain), 'test': (ntrain, ntotal)}
u.save_hd5py({'images': img_matrix, 'captions': caption_matrix}, c.twimg_hdf5_file,
        indices_dict)
Example #8
0
idx_to_words = dict((i + 1, w) for i, w in enumerate(caption_words))
idx_to_words[0] = '<e>'
captions = dict([(int(line.split(',')[0]), line.strip().split(',')[1])
                 for line in lines])

# create dataset to save as hdf5
from PIL import Image
import os
import utils as u

idx = 0
label = 0
caption_matrix = np.zeros((ntotal, c.max_caption_len), dtype=np.uint16)
img_matrix = np.empty((ntotal, 3, c.img_size, c.img_size), dtype=np.uint8)
for label in captions:
    vector = np.asarray([words_to_idx[w] for w in captions[label].split(' ')],
                        dtype=np.uint16)
    caption_matrix[idx, 0:vector.size] = vector
    im = Image.open(os.path.join(c.images_dir, '{}.jpg'.format(label)))
    img_matrix[idx] = u.arr_from_img_storage(im)
    idx += 1
    if idx % 400 == 0:
        print 'loaded img {}'.format(idx)

ntrain = int(ntotal * .95)
indices_dict = {'train': (0, ntrain), 'test': (ntrain, ntotal)}
u.save_hd5py({
    'images': img_matrix,
    'captions': caption_matrix
}, c.twimg_hdf5_file, indices_dict)