Esempio n. 1
0
def video_load_crop(entry, input_size):
    fg_path, bg_path, previous_path, flo_path = entry
    alpha, fg = reader.read_fg_img(fg_path)
    fg = fg.astype(dtype=np.float)  # potentially very big
    bg = cv2.imread(bg_path).astype(dtype=np.float)
    flo = reader.read_flow(flo_path)
    prev_alpha, _ = reader.read_fg_img(previous_path)
    warped_alpha = flow.warp_img(prev_alpha, flo)
    warped_alpha = np.repeat(warped_alpha[:, :, np.newaxis], 3, axis=2)
    crop_type = [(320, 320), (480, 480),
                 (640, 640)]  # we crop images of different sizes
    crop_h, crop_w = crop_type[np.random.randint(0, len(crop_type))]
    fg_h, fg_w = fg.shape[:2]
    if fg_h < crop_h or fg_w < crop_w:
        # in that case the image is not too big, and we have to add padding
        alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], 1))
        cat = np.concatenate((fg, alpha, warped_alpha), axis=2)
        cropped_cat = get_padded_img(cat, crop_h, crop_w)
        fg, alpha, warped_alpha = np.split(cropped_cat,
                                           indices_or_sections=[3, 4],
                                           axis=2)
    # otherwise, the fg is likely to be HRes, we directly crop it and dismiss the original image
    # to avoid manipulation big images
    fg_h, fg_w = fg.shape[:2]
    i, j = np.random.randint(0, fg_h - crop_h + 1), np.random.randint(
        0, fg_w - crop_w + 1)
    fg = fg[i:i + crop_h, j:j + crop_h]
    alpha = alpha[i:i + crop_h, j:j + crop_h]
    warped_alpha = warped_alpha[i:i + crop_h, j:j + crop_h]
    # randomly picks top-left corner
    bg_crop_h, bg_crop_w = int(np.ceil(crop_h * bg.shape[0] / fg.shape[0])), \
                           int(np.ceil(crop_w * bg.shape[1] / fg.shape[1]))
    padded_bg = get_padded_img(bg, bg_crop_h, bg_crop_w)
    i, j = np.random.randint(0,
                             bg.shape[0] - bg_crop_h + 1), np.random.randint(
                                 0, bg.shape[1] - bg_crop_w + 1)
    cropped_bg = padded_bg[i:i + bg_crop_h, j:j + bg_crop_w]
    bg = cv2.resize(src=cropped_bg,
                    dsize=input_size,
                    interpolation=cv2.INTER_LINEAR)
    fg = cv2.resize(fg, input_size, interpolation=cv2.INTER_LINEAR)
    alpha = cv2.resize(alpha, input_size, interpolation=cv2.INTER_LINEAR)
    warped_alpha = cv2.resize(warped_alpha,
                              input_size,
                              interpolation=cv2.INTER_LINEAR)

    cmp = reader.create_composite_image(fg, bg, alpha)
    cmp -= params.VGG_MEAN
    bg -= params.VGG_MEAN
    # inp = np.concatenate((cmp,
    #                       bg,
    #                       trimap.reshape((h, w, 1))), axis=2)
    label = alpha.reshape((alpha.shape[0], alpha.shape[1], 1))
    # warped_alpha = np.dstack([warped_alpha[:, :, np.newaxis]] * 3)

    return cmp, bg, label, warped_alpha, fg
Esempio n. 2
0
def augmentation(dim_dataset, voc_dataset, sig_dataset):
    """ create synthetic data for video matting by warping composite images (DIM matte / VOC background) """
    n = 50
    # we both take files from test and train datasets
    filepaths = [[os.path.join(dim_dataset, 'fg', folder, file)
                  for file in os.listdir(os.path.join(dim_dataset, 'fg', folder))]
                 for folder in ['DIM_TEST', 'DIM_TRAIN']]
    paths = filepaths[0] + filepaths[1]
    # we take VOC images as background
    voc_list = [os.path.join(voc_dataset, file) for file in os.listdir(voc_dataset)]
    dst_fg = os.path.join(sig_dataset, 'fg', 'augmented')
    dst_bg = os.path.join(sig_dataset, 'bg', 'augmented')
    for i, path in enumerate(paths):
        alpha, fg = reader.read_fg_img(path)
        name = os.path.basename(path).split('.')[0]
        print('Processing image {} ({}/{})'.format(name, i+1, len(paths)))
        bgra_ref = np.concatenate((fg, (255. * alpha.reshape((alpha.shape[0], alpha.shape[1], 1))).astype(np.uint8)),
                                  axis=2)
        cv2.imwrite(os.path.join(dst_fg, '{}_fg_ref.png'.format(name)), bgra_ref)
        for i in progressbar.progressbar(range(n)):
            bg_path = voc_list[np.random.randint(len(voc_list))]
            bg = cv2.imread(bg_path)
            bg = cv2.resize(bg, dsize=(fg.shape[1], fg.shape[0]), interpolation=cv2.INTER_LINEAR)
            nfg, nbg, nal = augment(fg, bg, alpha)
            augmented_fg = np.concatenate((nfg, (255. * nal.reshape((nal.shape[0], nal.shape[1], 1))).astype(np.uint8)),
                                          axis=2)
            cv2.imwrite(os.path.join(dst_bg, '{}_bg_ref_{:04d}.png'.format(name, i)), bg)
            cv2.imwrite(os.path.join(dst_bg, '{}_bg_{:04d}.png'.format(name, i)), nbg)
            cv2.imwrite(os.path.join(dst_fg, '{}_fg_{:04d}.png'.format(name, i)), augmented_fg)
Esempio n. 3
0
def load_and_crop(entry, input_size):
    """ loads load input/label from training list entry """
    fg_path, tr_path, bg_path = entry
    alpha, fg = reader.read_fg_img(fg_path)
    fg = fg.astype(dtype=np.float)  # potentially very big
    bg = cv2.imread(bg_path).astype(dtype=np.float)
    trimap = cv2.imread(tr_path, 0) / 255.
    trimap = trimap.reshape((trimap.shape[0], trimap.shape[1], 1))
    crop_type = [(320, 320), (480, 480),
                 (640, 640)]  # we crop images of different sizes
    crop_h, crop_w = crop_type[np.random.randint(0, len(crop_type))]
    fg_h, fg_w = fg.shape[:2]
    if fg_h < crop_h or fg_w < crop_w:
        # in that case the image is not too big, and we have to add padding
        alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], 1))
        cat = np.concatenate((fg, alpha, trimap), axis=2)
        cropped_cat = get_padded_img(cat, crop_h, crop_w)
        fg, alpha, trimap = np.split(cropped_cat,
                                     indices_or_sections=[3, 4],
                                     axis=2)
    # otherwise, the fg is likely to be HRes, we directly crop it and dismiss the original image
    # to avoid manipulation big images
    fg_h, fg_w = fg.shape[:2]
    i, j = np.random.randint(0, fg_h - crop_h + 1), np.random.randint(
        0, fg_w - crop_w + 1)
    fg = fg[i:i + crop_h, j:j + crop_h]
    alpha = alpha[i:i + crop_h, j:j + crop_h]
    trimap = trimap[i:i + crop_h, j:j + crop_h]
    # randomly picks top-left corner

    bg_crop_h, bg_crop_w = int(np.ceil(crop_h * bg.shape[0] / fg.shape[0])),\
                           int(np.ceil(crop_w * bg.shape[1] / fg.shape[1]))
    padded_bg = get_padded_img(bg, bg_crop_h, bg_crop_w)
    i, j = np.random.randint(0,
                             bg.shape[0] - bg_crop_h + 1), np.random.randint(
                                 0, bg.shape[1] - bg_crop_w + 1)
    cropped_bg = padded_bg[i:i + bg_crop_h, j:j + bg_crop_w]
    bg = cv2.resize(src=cropped_bg,
                    dsize=input_size,
                    interpolation=cv2.INTER_LINEAR)
    fg = cv2.resize(fg, input_size, interpolation=cv2.INTER_LINEAR)
    alpha = cv2.resize(alpha, input_size, interpolation=cv2.INTER_LINEAR)
    trimap = cv2.resize(trimap, input_size, interpolation=cv2.INTER_LINEAR)

    cmp = reader.create_composite_image(fg, bg, alpha)
    cmp -= params.VGG_MEAN
    bg -= params.VGG_MEAN
    trimap -= 0.5
    h, w = cmp.shape[:2]
    # inp = np.concatenate((cmp,
    #                       bg,
    #                       trimap.reshape((h, w, 1))), axis=2)
    inp = np.concatenate((cmp, bg), axis=2)
    label = alpha.reshape((alpha.shape[0], alpha.shape[1], 1))
    return inp, label, fg
Esempio n. 4
0
    # cv2.createTrackbar('threshold', 'image', 0, 50, nothing)
    # while True:
    #     excl = np.zeros((h, w), dtype=np.float)
    #     thresh = cv2.getTrackbarPos('threshold', 'image')
    #     excl[np.where(err > thresh)] = 1.
    #     cv2.imshow('image', excl)
    #     k = cv2.waitKey(1) & 0xFF
    #     if k == 27:
    #         break
    return alpha


if __name__ == '__main__':
    flow_f = reader.read_flow('./test_data/forward.flo')
    flow_b = reader.read_flow('./test_data/backward.flo')
    alp, img = reader.read_fg_img('./test_data/in0062.png')
    # bgr = warp_bgr(img, flow_b)
    # bgr2 = warp_bgr(bgr, flow_f)
    h, w = img.shape[:2]
    alpha = warp_img(alp, flow_b)
    cv2.imshow('noncorr', alpha)
    alpha = correct_alpha(flow_b, flow_f, alpha)
    cv2.imshow('test', alpha)
    cv2.waitKey(0)
    # res = np.concatenate((bgr, (255.*alpha.reshape(img.shape[0], img.shape[1], 1)).astype(np.uint8)), axis=2) / 255.
    # res = bgr
    # vis = np.zeros((h, w, 3), dtype=np.float)
    # vis[:, :, 2] = err.astype(np.float)
    # vis = 0.5 * (vis + res)
    # cv2.imwrite('./test_data/res.png', res)