Esempio n. 1
0
def run_test(args):
    print('---------- Initialize W&B run for experiment tracking----------\n')
    run = wandb.init(entity=args.wandb_entity,
                     project=args.wandb_project,
                     job_type='train')
    wandb.config.update(args)

    print('---------- Perform Testing ----------')

    savedir = args.savepath
    if not os.path.exists(savedir):
        os.mkdir(savedir)
    head_tail = os.path.split(args.dataset)
    savedir = os.path.join(savedir, head_tail[1])

    if not os.path.exists(savedir):
        raise NameError('There is no directory:\n %s' % (savedir))

    if not os.path.exists(os.path.join(savedir, "rendered videos")):
        os.mkdir(os.path.join(savedir, "rendered videos"))
        print('creating directory %s' %
              (os.path.join(savedir, "rendered videos")))

    print('XField type: %s' % (args.type))
    print('Dimension of input xfield: %s' % (args.dim))
    print('output video fps: %d' % (args.fps))
    print('number of intermediate points for interpolation: %d' % (args.scale))

    images, coordinates, all_pairs, h_res, w_res = load_imgs(args)
    min_ = np.min(coordinates)
    max_ = np.max(coordinates)

    dims = args.dim
    num_n = args.num_n

    if num_n > np.prod(dims):
        num_n = np.prod(dims)

    input = tf.placeholder(tf.float32, shape=[1, 1, 1, len(dims)])

    num_output = len(args.type) * 2
    with tf.variable_scope("gen_flows"):
        flows = Flow(input, h_res, w_res, num_output, args.nfg, min_, max_)

    if args.type == ['light', 'view', 'time']:

        with tf.variable_scope("gen_flows"):
            albedos = tf.Variable(tf.constant(
                1.0, shape=[dims[1] * dims[2], h_res, w_res, 3]),
                                  name='albedo')
            index_albedo = tf.placeholder(tf.int32, shape=(num_n, ))
            albedo = tf.gather(albedos, index_albedo, 0)

    elif args.type == ['light']:

        with tf.variable_scope("gen_flows"):
            albedo = tf.Variable(tf.constant(1.0, shape=[1, h_res, w_res, 3]),
                                 name='albedo')

    else:
        albedo = tf.constant(1.0, shape=[1, h_res, w_res, 3])

    input_N = tf.placeholder(tf.float32, shape=[num_n, 1, 1, len(dims)])
    Neighbors_img = tf.placeholder(tf.float32, shape=[num_n, h_res, w_res, 3])
    Neighbors_flow = tf.placeholder(
        tf.float32, shape=[num_n, h_res, w_res,
                           len(args.type) * 2])

    interpolated = Blending_test(input, input_N, Neighbors_img, Neighbors_flow,
                                 flows, albedo, h_res, w_res, args)

    saver = tf.train.Saver(max_to_keep=1000)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    ckpt = tf.train.get_checkpoint_state("%s/trained model/" % (savedir))
    if ckpt:
        print('\n loading pretrained model  ' + ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        raise NameError(
            'There is no pretrained model located at dir:\n %s/trained model/'
            % (savedir))

    precomputed_flows = []

    for i in range(len(coordinates)):
        flows_out = sess.run(flows, feed_dict={input: coordinates[[i], ::]})
        precomputed_flows.append(flows_out[0, ::])

    precomputed_flows = np.stack(precomputed_flows, 0)

    if args.type == ['view'] or args.type == ['light'
                                              ] or args.type == ['time']:

        theta = [np.pi / args.scale * i for i in range(args.scale + 1)]

        X1 = 1 - np.cos(theta)
        X2 = 1 + np.cos(theta)
        Y1 = 1 + np.sqrt(1 - (X1 - 1)**2)
        Y2 = 1 - np.sqrt(1 - (X2 - 1)**2)

        X = np.append(X1, X2)
        Y = np.append(Y1, Y2)
        X = X / 2
        Y = Y / 2

        if args.type == ['view'] or args.type == ['light']:

            X = X * (dims[1] - 1)
            Y = Y * (dims[0] - 1)
            rendering_path = np.transpose([X, Y])

        if args.type == ['time']:

            rendering_path = np.transpose([X * (dims[0] - 1)])

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter('%s/rendered videos/rendered.mp4' % (savedir),
                              fourcc, args.fps, (w_res, h_res))
        for id in range(len(X)):

            input_coord = np.array([[[rendering_path[id, :]]]])
            indices = np.argsort(
                np.sum(
                    np.square(input_coord[0, 0, 0, :] -
                              coordinates[:, 0, 0, :]), -1))[:num_n]

            input_coord_N = coordinates[indices, ::]
            input_Neighbors = images[indices, ::]
            input_flows = precomputed_flows[indices, ::]

            im_out = sess.run(interpolated,
                              feed_dict={
                                  input: input_coord,
                                  input_N: input_coord_N,
                                  Neighbors_img: input_Neighbors,
                                  Neighbors_flow: input_flows,
                              })
            im_out = np.minimum(np.maximum(im_out[0, ::], 0.0), 1.0)
            out.write(np.uint8(im_out * 255))

            print('\r interpolated image %d of %d' %
                  (id + 1, len(rendering_path)),
                  end=" ")

        out.release()
        wandb.log({
            "rendered":
            wandb.Video('%s/rendered videos/rendered.mp4' % (savedir),
                        fps=4,
                        format="mp4")
        })

    if args.type == ['light', 'view', 'time']:

        print('\n number of neighbors for interpolation: %d' % (num_n))
        max_L = dims[0] - 1
        max_V = dims[1] - 1
        max_T = dims[2] - 1

        X_L = np.linspace(0, max_L, max_L * args.scale)
        X_L = np.append(X_L, np.flip(X_L))
        X_V = np.linspace(0, max_V, max_V * args.scale)
        X_V = np.append(X_V, np.flip(X_V))
        X_T = np.linspace(0, max_T, max_T * args.scale)
        X_T = np.append(X_T, np.flip(X_T))
        middle_X_L = max_L * 0.5 * np.ones_like(X_L)
        middle_X_V = max_V * 0.5 * np.ones_like(X_V)
        middle_X_T = max_T * 0.5 * np.ones_like(X_T)

        all_dimensions = {
            'light': np.stack([X_L, middle_X_V, middle_X_T], 1),
            'view': np.stack([middle_X_L, X_V, middle_X_T], 1),
            'time': np.stack([middle_X_L, middle_X_V, X_T], 1),
            'light_view': np.stack([X_L, X_V, middle_X_T], 1),
            'light_time': np.stack([X_L, middle_X_V, X_T], 1),
            'view_time': np.stack([middle_X_L, X_V, X_T], 1),
            'light_view_time': np.stack([X_L, X_V, X_T], 1)
        }

        for case, rendering_path in all_dimensions.items():

            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(
                '%s/rendered videos/rendered_%s.mp4' % (savedir, case), fourcc,
                args.fps, (w_res, h_res))

            print('\n --------- %s interpolation ---------' % (case))

            for id in range(len(rendering_path)):

                input_coord = np.array([[[rendering_path[id, :]]]])
                indices = np.argsort(
                    np.sum(
                        np.square(input_coord[0, 0, 0, :] -
                                  coordinates[:, 0, 0, :]), -1))[:num_n]

                input_coord_N = coordinates[indices, ::]
                input_Neighbors = images[indices, ::]
                input_flows = precomputed_flows[indices, ::]

                time_idx = indices // (dims[0] * dims[1])
                rest = indices % (dims[0] * dims[1])
                view_idx = rest % dims[1]
                albedo_index = view_idx * dims[1] + time_idx

                im_out = sess.run(interpolated,
                                  feed_dict={
                                      input: input_coord,
                                      input_N: input_coord_N,
                                      Neighbors_img: input_Neighbors,
                                      Neighbors_flow: input_flows,
                                      index_albedo: albedo_index,
                                  })

                im_out = np.minimum(np.maximum(im_out[0, ::], 0.0), 1.0)
                out.write(np.uint8(im_out * 255))

                print('\r interpolated image %d of %d' %
                      (id + 1, len(rendering_path)),
                      end=" ")

            out.release()
            wandb.log({
                "rendered":
                wandb.Video('%s/rendered videos/rendered_%s.mp4' %
                            (savedir, case),
                            fps=4,
                            format="mp4")
            })
Esempio n. 2
0
def run_training(args):

    print('---------- Perform Training ----------')

    savedir = args.savepath
    if not os.path.exists(savedir):
        os.mkdir(savedir)

    head_tail = os.path.split(args.dataset)
    savedir = os.path.join(savedir, head_tail[1])

    if not os.path.exists(savedir):
        os.mkdir(savedir)

    if not os.path.exists(os.path.join(savedir, "trained model")):
        os.mkdir(os.path.join(savedir, "trained model"))
        print('creating directory %s' %
              (os.path.join(savedir, "trained model")))

    if not os.path.exists(os.path.join(savedir, "saved training")):
        os.mkdir(os.path.join(savedir, "saved training"))
        print('creating directory %s' %
              (os.path.join(savedir, "saved training")))

    print('XField type: %s' % (args.type))
    print('Dimension of input xfield: %s' % (args.dim))

    #loading images
    images, coordinates, all_pairs, h_res, w_res = load_imgs(args)

    dims = args.dim
    num_n = args.num_n  # number of neighbors
    min_ = np.min(coordinates)
    max_ = np.max(coordinates)

    print('\n ------- Creating the model -------')

    # batch size is num_n + 1 (number of neighbors + target)
    inputs = tf.placeholder(tf.float32, shape=[num_n + 1, 1, 1, len(dims)])

    # Jacobian network
    num_output = len(args.type) * 2

    with tf.variable_scope("gen_flows"):
        flows = Flow(inputs, h_res, w_res, num_output, args.nfg, min_, max_)

    nparams_decoder = np.sum([
        np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        if v.name.startswith("gen_flows")
    ])
    print('Number of learnable parameters (decoder): %d' % (nparams_decoder))

    # learnt albedo
    # The albedos are initialized with constant 1.0
    if args.type == ['light', 'view', 'time']:

        with tf.variable_scope("gen_flows"):

            # For light-view-time interpolation, we consider num_views*num_times albedos
            albedos = tf.Variable(tf.constant(
                1.0, shape=[dims[1] * dims[2], h_res, w_res, 3]),
                                  name='albedo')
            index_albedo = tf.placeholder(tf.int32, shape=(1, ))
            albedo = tf.gather(albedos, index_albedo, 0)

        nparams = np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
            if v.name.startswith("gen_flows")
        ])
        print(
            'Number of learnable parameters (%d albedos with res %d x %d ): %d'
            % (dims[1] * dims[2], h_res, w_res, nparams - nparams_decoder))

    elif args.type == ['light']:

        with tf.variable_scope("gen_flows"):
            # For light interpolation, we consider just one albedo
            albedo = tf.Variable(tf.constant(1.0, shape=[1, h_res, w_res, 3]),
                                 name='albedo')

        nparams = np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
            if v.name.startswith("gen_flows")
        ])
        print(
            'Number of learnable parameters (%d albedos with res %d x %d ): %d'
            % (1, h_res, w_res, nparams - nparams_decoder))

    else:
        # For view and time interpolation, we do not train for albedo, we consider it as a constant non-learnable parameter
        albedo = tf.constant(1.0, shape=[1, h_res, w_res, 3])

    Neighbors = tf.placeholder(tf.float32, shape=[num_n, h_res, w_res, 3])

    # soft blending
    interpolated = Blending_train(inputs, Neighbors, flows, albedo, h_res,
                                  w_res, args)

    Reference = tf.placeholder(tf.float32, shape=[1, h_res, w_res, 3])

    # L1 loss
    loss = tf.reduce_mean((tf.abs(interpolated - Reference)))

    gen_tvars = [
        var for var in tf.trainable_variables()
        if var.name.startswith("gen_flows")
    ]
    learning_rate = tf.placeholder(tf.float32, shape=())
    gen_optim = tf.train.AdamOptimizer(learning_rate)
    gen_grads = gen_optim.compute_gradients(loss, var_list=gen_tvars)
    gen_train = gen_optim.apply_gradients(gen_grads)

    saver = tf.train.Saver(max_to_keep=1000)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    if args.load_pretrained:
        ckpt = tf.train.get_checkpoint_state("%s\\trained model" % (savedir))
        if ckpt:
            print('\n loading pretrained model  ' + ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)

    print('------------ Start Training ------------')

    lr = args.lr
    print('Starting learning rate with %0.4f' % (lr))

    stop_l1_thr = 0.01

    iter_end = 100000  # total number of iterations

    indices = np.array([i for i in range(len(all_pairs))])
    if len(indices
           ) < 500:  # we considered around 500 iterations per each epoch
        indices = np.repeat(indices, 500 // len(indices))

    epoch_size = len(indices)
    epoch_end = iter_end // epoch_size  # total number of epochs

    if args.type == ['light', 'view', 'time']:

        st = time.time()
        min_loss = 1000
        l1_loss_t = 1
        epoch = 0

        while l1_loss_t > stop_l1_thr and epoch <= epoch_end:

            l1_loss_t = 0
            np.random.shuffle(indices)

            for id in range(epoch_size):

                pair = all_pairs[indices[id], ::]

                input_coords = coordinates[pair[:num_n + 1], ::]
                reference_img = images[pair[:1], ::]
                Neighbors_img = images[pair[1:num_n + 1], ::]
                _index = [pair[-1]]

                _, l1loss = sess.run(
                    [gen_train, loss],
                    feed_dict={
                        inputs: input_coords,
                        Reference: reference_img,
                        Neighbors: Neighbors_img,
                        learning_rate: lr,
                        index_albedo: _index
                    })
                l1_loss_t = l1_loss_t + l1loss

                print(
                    '\r Epoch %3.0d  Iteration %3.0d of %3.0d   Cumulative L1 loss = %3.3f'
                    % (epoch, id + 1, epoch_size, l1_loss_t),
                    end=" ")

            l1_loss_t = l1_loss_t / epoch_size
            print(" elapsed time %3.1f m  Averaged L1 loss = %3.5f " %
                  ((time.time() - st) / 60, l1_loss_t))

            if l1_loss_t < min_loss:
                saver.save(sess, "%s\\trained model\\model.ckpt" % (savedir))
                min_loss = l1_loss_t

            center = np.prod(dims) // 2
            cv2.imwrite("%s/saved training/reference.png" % (savedir),
                        np.uint8(images[center, ::] * 255))

            pair = all_pairs[3 * center + 0, ::]

            out_img, flows_out = sess.run(
                [interpolated, flows],
                feed_dict={
                    inputs: coordinates[pair[:num_n + 1], ::],
                    Neighbors: images[pair[1:num_n + 1], ::],
                    index_albedo: [pair[-1]]
                })

            out_img = np.minimum(np.maximum(out_img, 0.0), 1.0)
            cv2.imwrite("%s/saved training/recons_light.png" % (savedir),
                        np.uint8(out_img[0, ::] * 255))

            flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 0:2],
                                                convert_to_bgr=False)
            cv2.imwrite("%s/saved training/flow_light.png" % (savedir),
                        np.uint8(flow_color))

            flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 2:4],
                                                convert_to_bgr=False)
            cv2.imwrite("%s/saved training/flow_view.png" % (savedir),
                        np.uint8(flow_color))

            flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 4:6],
                                                convert_to_bgr=False)
            cv2.imwrite("%s/saved training/flow_time.png" % (savedir),
                        np.uint8(flow_color))

            pair = all_pairs[3 * center + 1, ::]
            out_img = sess.run(interpolated,
                               feed_dict={
                                   inputs: coordinates[pair[:num_n + 1], ::],
                                   Neighbors: images[pair[1:num_n + 1], ::],
                                   index_albedo: [pair[-1]]
                               })

            out_img = np.minimum(np.maximum(out_img, 0.0), 1.0)
            cv2.imwrite("%s/saved training/recons_view.png" % (savedir),
                        np.uint8(out_img[0, ::] * 255))

            pair = all_pairs[3 * center + 2, ::]
            out_img = sess.run(interpolated,
                               feed_dict={
                                   inputs: coordinates[pair[:num_n + 1], ::],
                                   Neighbors: images[pair[1:num_n + 1], ::],
                                   index_albedo: [pair[-1]]
                               })

            out_img = np.minimum(np.maximum(out_img, 0.0), 1.0)
            cv2.imwrite("%s/saved training/recons_time.png" % (savedir),
                        np.uint8(out_img[0, ::] * 255))
            epoch = epoch + 1

            if epoch == epoch_end // 2:
                lr = 0.00005

    if args.type == ['view'] or args.type == ['time'
                                              ] or args.type == ['light']:

        st = time.time()
        img_mov = cv2.VideoWriter(
            '%s/saved training/epoch_recons.mp4' % (savedir),
            cv2.VideoWriter_fourcc(*'mp4v'), 10, (w_res, h_res))
        flow_mov = cv2.VideoWriter(
            '%s/saved training/epoch_flows.mp4' % (savedir),
            cv2.VideoWriter_fourcc(*'mp4v'), 10, (w_res, h_res))

        min_loss = 1000
        l1_loss_t = 1
        epoch = 0

        while l1_loss_t > stop_l1_thr and epoch <= epoch_end:

            l1_loss_t = 0
            np.random.shuffle(indices)

            for id in range(epoch_size):

                pair = all_pairs[indices[id], ::]
                input_coords = coordinates[pair[:num_n + 1], ::]
                reference_img = images[pair[:1], ::]
                Neighbors_img = images[pair[1:num_n + 1], ::]

                _, l1loss = sess.run(
                    [gen_train, loss],
                    feed_dict={
                        inputs: input_coords,
                        Reference: reference_img,
                        Neighbors: Neighbors_img,
                        learning_rate: lr,
                    })

                l1_loss_t = l1_loss_t + l1loss
                print(
                    '\r Epoch %3.0d  Iteration %3.0d of %3.0d   Cumulative L1 loss = %3.3f'
                    % (epoch, id + 1, epoch_size, l1_loss_t),
                    end=" ")

            l1_loss_t = l1_loss_t / epoch_size
            print(" elapsed time %3.1f m  Averaged L1 loss = %3.5f" %
                  ((time.time() - st) / 60, l1_loss_t))

            if l1_loss_t < min_loss:
                saver.save(sess, "%s\\trained model\\model.ckpt" % (savedir))
                min_loss = l1_loss_t

            if args.type == ['light']:

                albedo_out = np.minimum(np.maximum(sess.run(albedo), 0.0), 1.0)
                cv2.imwrite("%s/saved training/albedo.png" % (savedir),
                            np.uint8(albedo_out[0, :, :, :] * 255))

            center = np.prod(dims) // 2
            cv2.imwrite("%s/saved training/reference.png" % (savedir),
                        np.uint8(images[center, ::] * 255))

            pair = all_pairs[(len(all_pairs) // len(images)) * center, ::]

            out_img, flows_out = sess.run(
                [interpolated, flows],
                feed_dict={
                    inputs: coordinates[pair[:num_n + 1], ::],
                    Neighbors: images[pair[1:num_n + 1], ::]
                })

            out_img = np.minimum(np.maximum(out_img, 0.0), 1.0)
            cv2.imwrite("%s/saved training/recons.png" % (savedir),
                        np.uint8(out_img[0, ::] * 255))

            flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 0:2],
                                                convert_to_bgr=False)
            cv2.imwrite("%s/saved training/flow.png" % (savedir),
                        np.uint8(flow_color))
            img_mov.write(np.uint8(out_img[0, ::] * 255))
            flow_mov.write(np.uint8(flow_color))
            epoch = epoch + 1

            if epoch == epoch_end // 2:
                lr = 0.00005

        img_mov.release()
        flow_mov.release()