Beispiel #1
0
 def __call__(self, inputs):
     img, flo = inputs
     return tf_warp(img, flo, self.data_format)
Beispiel #2
0
def main(args: Settings):
    data_format = args.data_format
    tf.keras.backend.set_image_data_format(data_format)
    model_file = Path(args.model)

    multi_output = True

    # Define inference-only model.
    model = build_interpolator(
        input_shape=args.input_shape,
        output_multiscale=False)
    load_weights(model, args.model)
    multi_output = False

    logging.info('Done with model load')

    # Extract flow-only model for visualization.
    # NOTE(ycho): We're only extracting forward-directional flow,
    # i.e. flow : prv[i, j] == nxt[i+flo[i,j,1], j+flo[i,j,0]]
    flow_model = tf.keras.Model(
        inputs=model.inputs,
        outputs=model.get_layer('lambda_11').get_output_at(0)
        # print(model.get_layer('lambda_11').get_output_at(1))
    )

    # FIXME(ycho): Ability to select dataset
    # Select dataset.
    if args.dataset == 'ytvos':
        dataset = YoutubeVos(YoutubeVosSettings(data_type='train'))
    elif args.dataset == 'vimeo':
        dataset = VimeoTriplet(VimeoTripletSettings(data_type='train'))
    else:
        raise ValueError('Invalid dataset = {}'.format(args.dataset))
    # TripletDataset -> tf.data.Dataset
    dataset = read_triplet_dataset(dataset, dsize=args.input_shape,
                                   augment=False,
                                   batch_size=1)

    for img0, img1, img2 in dataset:
        img_pair = tf.concat([img0, img2], axis=3)

        # @see pre_train:preprocess()
        if data_format == 'channels_first':
            img_pair = einops.rearrange(img_pair, 'n h w c -> n c h w')
        img_pair -= 0.5

        if True:
            flow = flow_model(img_pair)
            flow_rgb = flow_to_image(flow, data_format=data_format)
            _show('5-flow', flow_rgb[0], data_format)

            # warp 1 -> 0, let's see how it fares.
            if data_format == 'channels_first':
                upflow = 2.0 * einops.repeat(flow,
                                             'n c h w -> n c (h h2) (w w2)',
                                             h2=2, w2=2)
            else:
                upflow = 2.0 * einops.repeat(flow,
                                             'n h w c -> n (h h2) (w w2) c',
                                             h2=2, w2=2)
            if data_format == 'channels_first':
                img1_ = einops.rearrange(img1, 'n h w c -> n c h w')
            else:
                img1_ = img1
            img1w = tf_warp(img1_, upflow, data_format)
            _show('6-warp(==0-prv)', img1w[0], data_format)

        if True:
            pred_img1 = model(img_pair)

            # Take the last (full-res) image in case of multi output.
            # This would be the case if e.g. model.output_multiscale==True.
            if multi_output:
                pred_img1 = pred_img1[-1]

            overlay = 0.5 * img0[0] + 0.5 * img2[0]
            _show('0-prv', img0[0], 'channels_last')
            _show('1-nxt', img2[0], 'channels_last')
            _show('2-ground-truth', img1[0], 'channels_last')
            _show('3-pred', 0.5 + pred_img1[0], data_format=data_format)
            _show('4-overlay', overlay, 'channels_last')

        k = cv2.waitKey(0)
        if k in [27, ord('q')]:
            break
        continue
Beispiel #3
0
def estimate_occlusion_map(flow: tf.Tensor, data_format: str = None):
    """
    Estimate occlusion map from optical flow.
    Specifically, determine which pixels in the `next` frame cannot
    be determined based on the flow in the previous frame.
    flow specification:
    prv[i,j] = nxt[i+f[i,j,1], j+f[i,j,0]]

    TODO(ycho): non-stupid flow definition (major-minor)

    flow: (NHWC / NCHW tensor)
    """
    if data_format is None:
        data_format = tf.keras.backend.image_data_format()

    if data_format == 'channels_first':
        axis = -3
    else:
        axis = -1

    shape = get_spatial_shape(flow, data_format)
    h, w = shape['h'], shape['w']
    i, j = tf.meshgrid(tf.range(h), tf.range(w), indexing='ij')

    #src = tf.concat([i, j], axis=axis)
    #dst = tf.cast(tf.round(src + flow), tf.int32)
    #oob = tf.reduce_any(tf.logical_or(dst <= 0, dst >= (h, w)), axis=axis)

    # di, dj = tf.unstack(flow, axis=axis)
    i = tf.cast(i, tf.float32)
    j = tf.cast(j, tf.float32)
    dj, di = tf.unstack(flow, axis=axis)
    i2, j2 = i + di, j + dj
    idx2 = tf.cast(tf.stack([i2, j2], axis=-1), tf.int32)

    # Clip idx2 just for our happiness
    idx2 = tf.clip_by_value(idx2, [0, 0], [h - 1, w - 1])

    # print('idx2.shape = {}'.format(idx2.shape))
    # print('dj.shape = {}'.format(dj.shape))
    # m = tf.SparseTensor(idx2, 1, dense_shape=tf.expand_dims(dj, axis).shape)
    # msk = tf.sparse.to_dense(m, default_value=0.0)
    # msk = tf.scatter_nd(idx2, tf.ones_like(dj), (1,) + dj.shape)
    oob = tf.reduce_any([i2 < 0, i2 >= h, j2 < 0, j2 >= w], axis=0)
    oob = tf.cast(oob, tf.float32)
    # Add batch dim to idx2
    b = einops.repeat(tf.range(shape['n']), 'n -> n h w c', h=h, w=w, c=1)
    idx2_wb = tf.concat([b, idx2], axis=-1)

    # NOTE(ycho): naive inverse flow.
    # works-ish. The assumption here : larger flow = closer flow
    # flow2 = -tf.tensor_scatter_nd_max(tf.zeros_like(flow), idx2_wb, flow)
    inv_flow = -tf_warp(flow, flow, data_format)
    dj, di = tf.unstack(inv_flow, axis=axis)
    i2, j2 = i + di, j + dj
    idx3 = tf.cast(tf.stack([i2, j2], axis=-1), tf.int32)
    idx3 = tf.clip_by_value(idx3, [0, 0], [h - 1, w - 1])
    idx3_wb = tf.concat([b, idx3], axis=-1)
    updates = tf.zeros_like(i2, dtype=tf.float32)
    map3 = tf.tensor_scatter_nd_min(tf.ones_like(oob), idx3_wb, updates)
    # map3 = 0 if value, 1 if no value

    oob = tf.maximum(oob, map3)

    ## print('flow2', flow2.shape)

    ## idx2 = tf.reshape(idx2, [-1, 2])

    ## outer_dims = len(idx2.shape) - 1  # == 3
    ## print('outer_dims', outer_dims)
    ## ix = idx2.shape[outer_dims]  # == 2
    ## print('ix', ix)

    ## len(updates.shape) - outer_dims == len(oob.shape) - ix
    ## len(updates.shape) - 3 == len(oob.shape) - 2
    ## len(updates.shape) - 3 == 4 - 2

    #updates = tf.zeros_like(i2, dtype=tf.float32)
    #print(oob.shape)  # 1,256,512
    #print(idx2.shape)  # 1,256,512,2
    #print(updates.shape)  # 1,256,512
    ## oob = True(1) if out-of-bounds
    ## idx2_wb = list of "valid"
    #oob = tf.tensor_scatter_nd_max(oob, idx2_wb, updates)
    return oob
Beispiel #4
0
    # TODO(ycho): Cleanup dataset loading pattern for opt-flow datasets.
    glob_pattern = '/media/ssd/datasets/sintel-processed/shards/sintel-*.tfrecord'
    filenames = tf.data.Dataset.list_files(glob_pattern).shuffle(32)
    # dataset = get_reader(filenames).shuffle(buffer_size=1024).repeat().batch(8)
    # dataset = get_reader(filenames).batch(8).repeat()
    dataset = get_reader(filenames).shuffle(
        buffer_size=32).map(preprocess).batch(1)

    for ims, flo in dataset:
        # Unstack `ims`.
        if data_format == 'channels_first':
            prv, nxt = einops.rearrange(
                ims, 'n (k c) h w -> k n c h w', k=2)
        else:
            prv, nxt = einops.rearrange(
                ims, 'n h w (k c) -> k n h w c', k=2)

        occ = estimate_occlusion_map(flo, data_format)
        occ_f = tf.cast(occ, tf.float32)[..., None]
        nxt_w = tf_warp(nxt, flo, data_format)
        nxt_w_o = nxt_w * (1.0 - occ_f)

        show('occ', occ_f[0], True)
        show('prv', 0.5 + prv[0], True)
        show('nxt', 0.5 + nxt[0], True)
        show('nxt_w', 0.5 + nxt_w[0], True)
        show('nxt_w_o', ((0.5 + nxt_w_o) * (1.0 - occ_f))[0], True)
        k = cv2.waitKey(0)
        if k in [27, ord('q')]:
            break
Beispiel #5
0
def main(cfg: Settings):
    if cfg.data_format is not None:
        tf.keras.backend.set_image_data_format(cfg.data_format)
    data_format = tf.keras.backend.image_data_format()

    # 1) Build inference-only network
    model = build_flower(False,
                         cfg.input_shape,
                         data_format)

    # 2) Restore model.
    load_weights(model, cfg.model_file)

    if False:
        # from image

        # x = np.random.uniform(0, 255, size=(1, 256, 512, 6)).astype(np.uint8)
        lhs = cv2.imread(
            '/media/ssd/datasets/MPI-Sintel-complete/test/final/ambush_3/frame_0014.png')
        rhs = cv2.imread(
            '/media/ssd/datasets/MPI-Sintel-complete/test/final/ambush_3/frame_0015.png')
        lhs = cv2.resize(lhs, (512, 256))
        rhs = cv2.resize(rhs, (512, 256))
        x = np.concatenate([lhs, rhs], axis=-1)[None, ...]
        # FIXME(yycho0108): the series of above operations replicate
        # preprocess() data whitening procedure.
        y = model(x / 255.0 - 0.5).numpy()
        rhs_w = tf_warp(rhs[None, ...].astype(np.float32) / 255.0,
                        y)[0].numpy()

        cv2.imshow('lhs', lhs)
        cv2.imshow('rhs', rhs)
        cv2.imshow('overlay', rhs // 2 + lhs // 2)
        cv2.imshow('overlay-w', rhs_w / 2 + lhs / 255.0 / 2)
        cv2.imshow('flow-x', normalize(y[0, ..., 0]))
        cv2.imshow('flow-y', normalize(y[0, ..., 1]))
        cv2.imshow('rhs-w', rhs_w)
        cv2.waitKey(0)

    if True:
        # from tfrecord
        glob_pattern = '/media/ssd/datasets/sintel-processed/shards/sintel-*.tfrecord'
        filenames = tf.data.Dataset.list_files(glob_pattern).shuffle(32)
        # dataset = get_reader(filenames).shuffle(buffer_size=1024).repeat().batch(8)
        # dataset = get_reader(filenames).batch(8).repeat()
        dataset = get_reader(filenames).shuffle(
            buffer_size=32).map(preprocess).batch(1)

        for ims, flo in dataset:
            flo_pred = model.predict(ims)

            # Unstack `ims`.
            if data_format == 'channels_first':
                prv, nxt = einops.rearrange(
                    ims, 'n (k c) h w -> k n c h w', k=2)
            else:
                prv, nxt = einops.rearrange(
                    ims, 'n h w (k c) -> k n c h w', k=2)

            # NOTE(ycho): Maintain consistent `data_format` for sanity
            # preserving. Slightly inefficient but oh well...
            #if data_format == 'channels_first':
            #    nxt_nhwc = einops.rearrange(nxt, 'n c h w -> n h w c')
            #    flo_pred_nhwc = einops.rearrange(
            #        flo_pred, 'n c h w -> n h w c')
            #    nxt_w = tf_warp(nxt_nhwc, flo_pred_nhwc, data_format)
            #    nxt_w = einops.rearrange(nxt_w, 'n h w c -> n c h w')

            #    nxt_w_gt = tf_warp(nxt, flo)
            #else:
            nxt_w = tf_warp(nxt, flo_pred, data_format)
            nxt_w_gt = tf_warp(nxt, flo, data_format)

            # Undo `preprocess()`
            prv = 0.5 + prv
            nxt = 0.5 + nxt
            nxt_w = 0.5 + nxt_w
            nxt_w_gt = 0.5 + nxt_w_gt
            flo_pred = flo_pred

            # Apply colorization.
            flo_rgb = flow_to_image(flo, data_format)
            flo_pred_rgb = flow_to_image(flo_pred, data_format)

            # Compute derived visualizations.
            overlay = 0.5 * prv + 0.5 * nxt
            overlay_warped = 0.5 * prv + 0.5 * nxt_w
            delta_warped = tf.abs(0.5 * prv - 0.5 * nxt_w)
            overlay_warped_gt = 0.5 * prv + 0.5 * nxt_w_gt
            delta_warped_gt = tf.abs(0.5 * prv - 0.5 * nxt_w_gt)

            # Show all.
            for name in ['prv', 'nxt', 'nxt_w', 'overlay',
                         'overlay_warped', 'overlay_warped_gt',
                         'delta_warped', 'delta_warped_gt',
                         'flo_rgb', 'flo_pred_rgb']:
                image = locals()[name]
                # NOTE(ycho): unbatch before showing.
                show(name, image[0], True, data_format)

            k = cv2.waitKey(0)
            if k == 27:
                break
Beispiel #6
0
if True:
    tf.keras.backend.set_image_data_format('channels_last')
    data_format = tf.keras.backend.image_data_format()
    disable_gpu()

    # TODO(ycho): Cleanup dataset loading pattern for opt-flow datasets.
    glob_pattern = '/media/ssd/datasets/sintel-processed/shards/sintel-*.tfrecord'
    filenames = tf.data.Dataset.list_files(glob_pattern).shuffle(32)
    # dataset = get_reader(filenames).shuffle(buffer_size=1024).repeat().batch(8)
    # dataset = get_reader(filenames).batch(8).repeat()
    dataset = get_reader(filenames).shuffle(
        buffer_size=32).map(preprocess).batch(1)

    for ims, flo in dataset:
        inv_flo = -tf_warp(flo, flo, data_format)

        # Unstack `ims`.
        if data_format == 'channels_first':
            prv, nxt = einops.rearrange(ims, 'n (k c) h w -> k n c h w', k=2)
        else:
            prv, nxt = einops.rearrange(ims, 'n h w (k c) -> k n h w c', k=2)

        nxt_w = tf_warp(nxt, flo, data_format)
        prv_w = tf_warp(prv, inv_flo, data_format)
        nxt_ww = tf_warp(nxt_w, inv_flo, data_format)

        show('prv', 0.5 + prv[0], True)
        show('nxt', 0.5 + nxt[0], True)
        show('nxt_w', 0.5 + nxt_w[0], True)
        show('nxt_ww', 0.5 + nxt_ww[0], True)