Esempio n. 1
0
def setup_input(batch_size, data_format):

    # Load MPI Sintel dataset.
    # def _preprocess(ims, flo):
    #     return preprocess(ims, flo, data_format, 1.0)
    # glob_pattern = '/media/ssd/datasets/sintel-processed/shards/sintel-*.tfrecord'
    # dataset = (tf.data.Dataset.list_files(glob_pattern).interleave(
    #     lambda x: tf.data.TFRecordDataset(x, compression_type='ZLIB'),
    #     cycle_length=tf.data.experimental.AUTOTUNE,
    #     num_parallel_calls=tf.data.experimental.AUTOTUNE)
    #     .shuffle(buffer_size=32)
    #     .map(read_record)
    #     .map(_preprocess)
    #     .batch(batch_size, drop_remainder=True)
    #     .prefetch(buffer_size=tf.data.experimental.AUTOTUNE))

    # Load FlyingChairs3D dataset.
    # FIXME(yycho0108): 0.56 value here is an inevitable result of:
    # - the desire to downsample the image resolution
    # - the requirement that the size of the output image be 256x512.
    def _preprocess_fc3d(ims, flo):
        return preprocess(ims, flo, data_format, 0.56)

    dataset = (get_dataset_from_set().map(_preprocess_fc3d).batch(
        batch_size, drop_remainder=True).prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE))

    # dataset = dataset.concatenate(dataset_fc3d)
    # dataset = dataset.take(1).cache()
    return dataset
Esempio n. 2
0
def main():
    disable_gpu()

    # compute_stats()

    if False:
        filename = '/media/ssd/datasets/sintel-processed/sintel.tfrecord'
        reader = get_reader(filename).map(preprocess)
    else:
        reader = get_dataset_from_set().map(preprocess_fc3d)
        # reader = get_dataset().interleave(lambda x: Dataset.from_tensors(x).map(decode_files),
        #                                  cycle_length=tf.data.experimental.AUTOTUNE,
        #                                  num_parallel_calls=tf.data.experimental.AUTOTUNE).map(preprocess)

    reader.shuffle(buffer_size=32)
    for entry in reader.as_numpy_iterator():
        ims, flo = entry
        flo_vis = flow_to_image(flo)
        prv = ims[..., :3]
        nxt = ims[..., 3:]

        #print('prv', prv.min(), prv.max())
        #print('nxt', nxt.min(), nxt.max())
        #print('flo', flo.min(), flo.max())
        #print('flo', np.linalg.norm(flo, axis=-1).mean())

        # show prev reconstructed from nxt.
        # nxt_w = tfa.image.dense_image_warp(nxt[None, ...].astype(
        #    np.float32)/255.0, -flo[None, ..., ::-1]).numpy()
        # nxt_w = tf_warp(nxt[None, ...].astype(
        #    np.float32)/255.0, flo[None, ...]).numpy()

        # flo order : (x,y) == (1,0)
        nxt_w = tfa.image.dense_image_warp(nxt[None, ...],
                                           -flo[None, ..., ::-1])[0].numpy()
        # nxt_w = tf_warp(nxt[None, ...], flo)[0].numpy()
        print(nxt_w.shape)

        cv2.imshow('prv', prv)
        cv2.imshow('nxt', nxt)
        # cv2.imshow('msk', prv_has_flo.astype(np.float32))
        cv2.imshow('nxt_w', nxt_w)
        cv2.imshow('nxt_w2', nxt_w - prv)

        # bgr, prv=b, nxt=g, r=warp
        overlay = np.stack([(prv).mean(axis=-1), (nxt).mean(axis=-1),
                            (nxt_w).mean(axis=-1)],
                           axis=-1)
        cv2.imshow('overlay', overlay)
        cv2.imshow('flo', normalize(flo[..., 0]))
        cv2.imshow('flo-vis', flo_vis.numpy())
        k = cv2.waitKey(0)
        if k == 27:
            break
Esempio n. 3
0
    def _get_test_data(self):
        batch_size = self.batch_size
        data_format = tf.keras.backend.image_data_format()
        val_data = next(get_dataset_from_set().map(preprocess_no_op).batch(
            batch_size).take(1).cache().as_numpy_iterator())

        # Might as well also precompute flow image.
        val_ims, val_flo = val_data
        val_flow_img = flow_to_image(val_flo, data_format=data_format)
        if data_format == 'channels_first':
            # nchw -> nhwc
            val_flow_img = tf.transpose(val_flow_img, (0, 2, 3, 1))
        return val_data, val_flow_img
Esempio n. 4
0
def compute_stats(size=1024):
    reader = (get_dataset_from_set().map(
        preprocess_fc3d,
        num_parallel_calls=tf.data.experimental.AUTOTUNE).prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE))
    count = 0
    means = 0
    for entry in tqdm(reader.as_numpy_iterator(), total=size):
        ims, flo = entry
        mean = np.linalg.norm(flo, axis=-1).mean()
        means += mean
        count += 1
        if count >= size:
            break
    print('mean flow : {}'.format(means / count))
Esempio n. 5
0
def train_custom(model, losses, dataset, path, config):
    """
    Custom training loop.
    """

    # Unroll config.
    (batch_size, num_epoch, update_freq, data_format, allow_memory_growth,
     use_custom_training) = config

    # Setup metrics.
    metrics = {}
    metrics['loss'] = tf.keras.metrics.Mean(name='loss', dtype=tf.float32)
    # metrics['epe'] = tf.keras.metrics.Mean(name='epe', dtype=tf.float32)
    for out in model.outputs:
        if data_format == 'channels_first':
            h = out.shape[2]
        else:
            h = out.shape[1]
        name = 'flow-loss-{:02d}'.format(h)
        metrics[name] = tf.keras.metrics.Mean(name=name, dtype=tf.float32)

    # Retrieve validation dataset (only used for visualization for now) ...
    val_data = next(get_dataset_from_set().map(preprocess_no_op).batch(
        batch_size).take(1).cache().as_numpy_iterator())

    # Setup handlers for training/logging.
    # lr = learning_rate_cyclic(batch_size)
    lr = 1e-4  # learning_rate_cyclic(batch_size)
    optim = tf.keras.optimizers.Adam(learning_rate=lr)
    writer = tf.summary.create_file_writer(str(path['log']))
    ckpt = tf.train.Checkpoint(optimizer=optim, model=model)
    ckpt_mgr = tf.train.CheckpointManager(ckpt,
                                          str(path['ckpt']),
                                          max_to_keep=8)

    # Load from checkpoint.
    ckpt.restore(tf.train.latest_checkpoint('/tmp/pwc/run/044/ckpt/'))

    # Iterate through train loop.
    for epoch in range(num_epoch):
        print('Epoch {:03d}/{:03d}'.format(epoch, num_epoch))
        # prepare epoch.
        for v in metrics.values():
            v.reset_states()

        # train epoch.
        for ims, flo in dataset:
            # Skip invalid inputs (unlikely but happens sometimes)
            if not (tf.reduce_all(tf.math.is_finite(ims))
                    and tf.reduce_all(tf.math.is_finite(flo))):
                continue

            opt_iter, flow_loss, step_loss = train_step(
                model, losses, optim, ims, flo)

            # update metrics.
            metrics['loss'].update_state(step_loss)
            for out, l in zip(model.outputs, flow_loss):
                if data_format == 'channels_first':
                    h = out.shape[2]
                else:
                    h = out.shape[1]
                name = 'flow-loss-{:02d}'.format(h)
                metrics[name].update_state(l)

            # log/save.
            if (opt_iter > 0) and ((opt_iter % update_freq) == 0):
                # compute flows and output image.
                val_ims, val_flo = val_data

                # First add ground truth flow ...
                val_flow_img = flow_to_image(val_flo, data_format=data_format)
                if data_format == 'channels_first':
                    # nchw -> nhwc
                    val_flow_img = tf.transpose(val_flow_img, (0, 2, 3, 1))
                flow_imgs = [val_flow_img]

                flows = model(val_ims, training=False)
                for flow in flows:
                    flow_img = flow_to_image(flow, data_format=data_format)
                    if data_format == 'channels_first':
                        # nchw -> nhwc
                        flow_img = tf.transpose(flow_img, (0, 2, 3, 1))

                    # NOTE(yycho0108):
                    # interpolate nearest (tensorboard visualization applies
                    # bilinear interpolation by default).
                    flow_img = tf.image.resize(
                        flow_img,
                        size=val_flow_img.shape[1:3],
                        method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                    flow_imgs.append(flow_img)

                with writer.as_default():
                    tf.summary.scalar('iter', opt_iter, step=opt_iter)
                    tf.summary.scalar('learning_rate', lr, step=opt_iter)
                    # tf.summary.scalar('learning_rate', lr(
                    #    tf.cast(opt_iter, tf.float32)), step=opt_iter)
                    for k, v in metrics.items():
                        tf.summary.scalar(k, v.result(), step=opt_iter)
                    # will this work?
                    for i, flow_img in enumerate(flow_imgs):
                        name = 'flow-{:02d}'.format(i)
                        tf.summary.image(name,
                                         flow_img,
                                         step=opt_iter,
                                         max_outputs=3)
        ckpt_mgr.save(epoch)
    model.save_weights(str(path['run'] / 'model.h5'))