def run_test(args): print('---------- Initialize W&B run for experiment tracking----------\n') run = wandb.init(entity=args.wandb_entity, project=args.wandb_project, job_type='train') wandb.config.update(args) print('---------- Perform Testing ----------') savedir = args.savepath if not os.path.exists(savedir): os.mkdir(savedir) head_tail = os.path.split(args.dataset) savedir = os.path.join(savedir, head_tail[1]) if not os.path.exists(savedir): raise NameError('There is no directory:\n %s' % (savedir)) if not os.path.exists(os.path.join(savedir, "rendered videos")): os.mkdir(os.path.join(savedir, "rendered videos")) print('creating directory %s' % (os.path.join(savedir, "rendered videos"))) print('XField type: %s' % (args.type)) print('Dimension of input xfield: %s' % (args.dim)) print('output video fps: %d' % (args.fps)) print('number of intermediate points for interpolation: %d' % (args.scale)) images, coordinates, all_pairs, h_res, w_res = load_imgs(args) min_ = np.min(coordinates) max_ = np.max(coordinates) dims = args.dim num_n = args.num_n if num_n > np.prod(dims): num_n = np.prod(dims) input = tf.placeholder(tf.float32, shape=[1, 1, 1, len(dims)]) num_output = len(args.type) * 2 with tf.variable_scope("gen_flows"): flows = Flow(input, h_res, w_res, num_output, args.nfg, min_, max_) if args.type == ['light', 'view', 'time']: with tf.variable_scope("gen_flows"): albedos = tf.Variable(tf.constant( 1.0, shape=[dims[1] * dims[2], h_res, w_res, 3]), name='albedo') index_albedo = tf.placeholder(tf.int32, shape=(num_n, )) albedo = tf.gather(albedos, index_albedo, 0) elif args.type == ['light']: with tf.variable_scope("gen_flows"): albedo = tf.Variable(tf.constant(1.0, shape=[1, h_res, w_res, 3]), name='albedo') else: albedo = tf.constant(1.0, shape=[1, h_res, w_res, 3]) input_N = tf.placeholder(tf.float32, shape=[num_n, 1, 1, len(dims)]) Neighbors_img = tf.placeholder(tf.float32, shape=[num_n, h_res, w_res, 3]) Neighbors_flow = tf.placeholder( tf.float32, shape=[num_n, h_res, w_res, len(args.type) * 2]) interpolated = Blending_test(input, input_N, Neighbors_img, Neighbors_flow, flows, albedo, h_res, w_res, args) saver = tf.train.Saver(max_to_keep=1000) sess = tf.Session() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state("%s/trained model/" % (savedir)) if ckpt: print('\n loading pretrained model ' + ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: raise NameError( 'There is no pretrained model located at dir:\n %s/trained model/' % (savedir)) precomputed_flows = [] for i in range(len(coordinates)): flows_out = sess.run(flows, feed_dict={input: coordinates[[i], ::]}) precomputed_flows.append(flows_out[0, ::]) precomputed_flows = np.stack(precomputed_flows, 0) if args.type == ['view'] or args.type == ['light' ] or args.type == ['time']: theta = [np.pi / args.scale * i for i in range(args.scale + 1)] X1 = 1 - np.cos(theta) X2 = 1 + np.cos(theta) Y1 = 1 + np.sqrt(1 - (X1 - 1)**2) Y2 = 1 - np.sqrt(1 - (X2 - 1)**2) X = np.append(X1, X2) Y = np.append(Y1, Y2) X = X / 2 Y = Y / 2 if args.type == ['view'] or args.type == ['light']: X = X * (dims[1] - 1) Y = Y * (dims[0] - 1) rendering_path = np.transpose([X, Y]) if args.type == ['time']: rendering_path = np.transpose([X * (dims[0] - 1)]) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter('%s/rendered videos/rendered.mp4' % (savedir), fourcc, args.fps, (w_res, h_res)) for id in range(len(X)): input_coord = np.array([[[rendering_path[id, :]]]]) indices = np.argsort( np.sum( np.square(input_coord[0, 0, 0, :] - coordinates[:, 0, 0, :]), -1))[:num_n] input_coord_N = coordinates[indices, ::] input_Neighbors = images[indices, ::] input_flows = precomputed_flows[indices, ::] im_out = sess.run(interpolated, feed_dict={ input: input_coord, input_N: input_coord_N, Neighbors_img: input_Neighbors, Neighbors_flow: input_flows, }) im_out = np.minimum(np.maximum(im_out[0, ::], 0.0), 1.0) out.write(np.uint8(im_out * 255)) print('\r interpolated image %d of %d' % (id + 1, len(rendering_path)), end=" ") out.release() wandb.log({ "rendered": wandb.Video('%s/rendered videos/rendered.mp4' % (savedir), fps=4, format="mp4") }) if args.type == ['light', 'view', 'time']: print('\n number of neighbors for interpolation: %d' % (num_n)) max_L = dims[0] - 1 max_V = dims[1] - 1 max_T = dims[2] - 1 X_L = np.linspace(0, max_L, max_L * args.scale) X_L = np.append(X_L, np.flip(X_L)) X_V = np.linspace(0, max_V, max_V * args.scale) X_V = np.append(X_V, np.flip(X_V)) X_T = np.linspace(0, max_T, max_T * args.scale) X_T = np.append(X_T, np.flip(X_T)) middle_X_L = max_L * 0.5 * np.ones_like(X_L) middle_X_V = max_V * 0.5 * np.ones_like(X_V) middle_X_T = max_T * 0.5 * np.ones_like(X_T) all_dimensions = { 'light': np.stack([X_L, middle_X_V, middle_X_T], 1), 'view': np.stack([middle_X_L, X_V, middle_X_T], 1), 'time': np.stack([middle_X_L, middle_X_V, X_T], 1), 'light_view': np.stack([X_L, X_V, middle_X_T], 1), 'light_time': np.stack([X_L, middle_X_V, X_T], 1), 'view_time': np.stack([middle_X_L, X_V, X_T], 1), 'light_view_time': np.stack([X_L, X_V, X_T], 1) } for case, rendering_path in all_dimensions.items(): fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter( '%s/rendered videos/rendered_%s.mp4' % (savedir, case), fourcc, args.fps, (w_res, h_res)) print('\n --------- %s interpolation ---------' % (case)) for id in range(len(rendering_path)): input_coord = np.array([[[rendering_path[id, :]]]]) indices = np.argsort( np.sum( np.square(input_coord[0, 0, 0, :] - coordinates[:, 0, 0, :]), -1))[:num_n] input_coord_N = coordinates[indices, ::] input_Neighbors = images[indices, ::] input_flows = precomputed_flows[indices, ::] time_idx = indices // (dims[0] * dims[1]) rest = indices % (dims[0] * dims[1]) view_idx = rest % dims[1] albedo_index = view_idx * dims[1] + time_idx im_out = sess.run(interpolated, feed_dict={ input: input_coord, input_N: input_coord_N, Neighbors_img: input_Neighbors, Neighbors_flow: input_flows, index_albedo: albedo_index, }) im_out = np.minimum(np.maximum(im_out[0, ::], 0.0), 1.0) out.write(np.uint8(im_out * 255)) print('\r interpolated image %d of %d' % (id + 1, len(rendering_path)), end=" ") out.release() wandb.log({ "rendered": wandb.Video('%s/rendered videos/rendered_%s.mp4' % (savedir, case), fps=4, format="mp4") })
def run_training(args): print('---------- Perform Training ----------') savedir = args.savepath if not os.path.exists(savedir): os.mkdir(savedir) head_tail = os.path.split(args.dataset) savedir = os.path.join(savedir, head_tail[1]) if not os.path.exists(savedir): os.mkdir(savedir) if not os.path.exists(os.path.join(savedir, "trained model")): os.mkdir(os.path.join(savedir, "trained model")) print('creating directory %s' % (os.path.join(savedir, "trained model"))) if not os.path.exists(os.path.join(savedir, "saved training")): os.mkdir(os.path.join(savedir, "saved training")) print('creating directory %s' % (os.path.join(savedir, "saved training"))) print('XField type: %s' % (args.type)) print('Dimension of input xfield: %s' % (args.dim)) #loading images images, coordinates, all_pairs, h_res, w_res = load_imgs(args) dims = args.dim num_n = args.num_n # number of neighbors min_ = np.min(coordinates) max_ = np.max(coordinates) print('\n ------- Creating the model -------') # batch size is num_n + 1 (number of neighbors + target) inputs = tf.placeholder(tf.float32, shape=[num_n + 1, 1, 1, len(dims)]) # Jacobian network num_output = len(args.type) * 2 with tf.variable_scope("gen_flows"): flows = Flow(inputs, h_res, w_res, num_output, args.nfg, min_, max_) nparams_decoder = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() if v.name.startswith("gen_flows") ]) print('Number of learnable parameters (decoder): %d' % (nparams_decoder)) # learnt albedo # The albedos are initialized with constant 1.0 if args.type == ['light', 'view', 'time']: with tf.variable_scope("gen_flows"): # For light-view-time interpolation, we consider num_views*num_times albedos albedos = tf.Variable(tf.constant( 1.0, shape=[dims[1] * dims[2], h_res, w_res, 3]), name='albedo') index_albedo = tf.placeholder(tf.int32, shape=(1, )) albedo = tf.gather(albedos, index_albedo, 0) nparams = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() if v.name.startswith("gen_flows") ]) print( 'Number of learnable parameters (%d albedos with res %d x %d ): %d' % (dims[1] * dims[2], h_res, w_res, nparams - nparams_decoder)) elif args.type == ['light']: with tf.variable_scope("gen_flows"): # For light interpolation, we consider just one albedo albedo = tf.Variable(tf.constant(1.0, shape=[1, h_res, w_res, 3]), name='albedo') nparams = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() if v.name.startswith("gen_flows") ]) print( 'Number of learnable parameters (%d albedos with res %d x %d ): %d' % (1, h_res, w_res, nparams - nparams_decoder)) else: # For view and time interpolation, we do not train for albedo, we consider it as a constant non-learnable parameter albedo = tf.constant(1.0, shape=[1, h_res, w_res, 3]) Neighbors = tf.placeholder(tf.float32, shape=[num_n, h_res, w_res, 3]) # soft blending interpolated = Blending_train(inputs, Neighbors, flows, albedo, h_res, w_res, args) Reference = tf.placeholder(tf.float32, shape=[1, h_res, w_res, 3]) # L1 loss loss = tf.reduce_mean((tf.abs(interpolated - Reference))) gen_tvars = [ var for var in tf.trainable_variables() if var.name.startswith("gen_flows") ] learning_rate = tf.placeholder(tf.float32, shape=()) gen_optim = tf.train.AdamOptimizer(learning_rate) gen_grads = gen_optim.compute_gradients(loss, var_list=gen_tvars) gen_train = gen_optim.apply_gradients(gen_grads) saver = tf.train.Saver(max_to_keep=1000) sess = tf.Session() sess.run(tf.global_variables_initializer()) if args.load_pretrained: ckpt = tf.train.get_checkpoint_state("%s\\trained model" % (savedir)) if ckpt: print('\n loading pretrained model ' + ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) print('------------ Start Training ------------') lr = args.lr print('Starting learning rate with %0.4f' % (lr)) stop_l1_thr = 0.01 iter_end = 100000 # total number of iterations indices = np.array([i for i in range(len(all_pairs))]) if len(indices ) < 500: # we considered around 500 iterations per each epoch indices = np.repeat(indices, 500 // len(indices)) epoch_size = len(indices) epoch_end = iter_end // epoch_size # total number of epochs if args.type == ['light', 'view', 'time']: st = time.time() min_loss = 1000 l1_loss_t = 1 epoch = 0 while l1_loss_t > stop_l1_thr and epoch <= epoch_end: l1_loss_t = 0 np.random.shuffle(indices) for id in range(epoch_size): pair = all_pairs[indices[id], ::] input_coords = coordinates[pair[:num_n + 1], ::] reference_img = images[pair[:1], ::] Neighbors_img = images[pair[1:num_n + 1], ::] _index = [pair[-1]] _, l1loss = sess.run( [gen_train, loss], feed_dict={ inputs: input_coords, Reference: reference_img, Neighbors: Neighbors_img, learning_rate: lr, index_albedo: _index }) l1_loss_t = l1_loss_t + l1loss print( '\r Epoch %3.0d Iteration %3.0d of %3.0d Cumulative L1 loss = %3.3f' % (epoch, id + 1, epoch_size, l1_loss_t), end=" ") l1_loss_t = l1_loss_t / epoch_size print(" elapsed time %3.1f m Averaged L1 loss = %3.5f " % ((time.time() - st) / 60, l1_loss_t)) if l1_loss_t < min_loss: saver.save(sess, "%s\\trained model\\model.ckpt" % (savedir)) min_loss = l1_loss_t center = np.prod(dims) // 2 cv2.imwrite("%s/saved training/reference.png" % (savedir), np.uint8(images[center, ::] * 255)) pair = all_pairs[3 * center + 0, ::] out_img, flows_out = sess.run( [interpolated, flows], feed_dict={ inputs: coordinates[pair[:num_n + 1], ::], Neighbors: images[pair[1:num_n + 1], ::], index_albedo: [pair[-1]] }) out_img = np.minimum(np.maximum(out_img, 0.0), 1.0) cv2.imwrite("%s/saved training/recons_light.png" % (savedir), np.uint8(out_img[0, ::] * 255)) flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 0:2], convert_to_bgr=False) cv2.imwrite("%s/saved training/flow_light.png" % (savedir), np.uint8(flow_color)) flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 2:4], convert_to_bgr=False) cv2.imwrite("%s/saved training/flow_view.png" % (savedir), np.uint8(flow_color)) flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 4:6], convert_to_bgr=False) cv2.imwrite("%s/saved training/flow_time.png" % (savedir), np.uint8(flow_color)) pair = all_pairs[3 * center + 1, ::] out_img = sess.run(interpolated, feed_dict={ inputs: coordinates[pair[:num_n + 1], ::], Neighbors: images[pair[1:num_n + 1], ::], index_albedo: [pair[-1]] }) out_img = np.minimum(np.maximum(out_img, 0.0), 1.0) cv2.imwrite("%s/saved training/recons_view.png" % (savedir), np.uint8(out_img[0, ::] * 255)) pair = all_pairs[3 * center + 2, ::] out_img = sess.run(interpolated, feed_dict={ inputs: coordinates[pair[:num_n + 1], ::], Neighbors: images[pair[1:num_n + 1], ::], index_albedo: [pair[-1]] }) out_img = np.minimum(np.maximum(out_img, 0.0), 1.0) cv2.imwrite("%s/saved training/recons_time.png" % (savedir), np.uint8(out_img[0, ::] * 255)) epoch = epoch + 1 if epoch == epoch_end // 2: lr = 0.00005 if args.type == ['view'] or args.type == ['time' ] or args.type == ['light']: st = time.time() img_mov = cv2.VideoWriter( '%s/saved training/epoch_recons.mp4' % (savedir), cv2.VideoWriter_fourcc(*'mp4v'), 10, (w_res, h_res)) flow_mov = cv2.VideoWriter( '%s/saved training/epoch_flows.mp4' % (savedir), cv2.VideoWriter_fourcc(*'mp4v'), 10, (w_res, h_res)) min_loss = 1000 l1_loss_t = 1 epoch = 0 while l1_loss_t > stop_l1_thr and epoch <= epoch_end: l1_loss_t = 0 np.random.shuffle(indices) for id in range(epoch_size): pair = all_pairs[indices[id], ::] input_coords = coordinates[pair[:num_n + 1], ::] reference_img = images[pair[:1], ::] Neighbors_img = images[pair[1:num_n + 1], ::] _, l1loss = sess.run( [gen_train, loss], feed_dict={ inputs: input_coords, Reference: reference_img, Neighbors: Neighbors_img, learning_rate: lr, }) l1_loss_t = l1_loss_t + l1loss print( '\r Epoch %3.0d Iteration %3.0d of %3.0d Cumulative L1 loss = %3.3f' % (epoch, id + 1, epoch_size, l1_loss_t), end=" ") l1_loss_t = l1_loss_t / epoch_size print(" elapsed time %3.1f m Averaged L1 loss = %3.5f" % ((time.time() - st) / 60, l1_loss_t)) if l1_loss_t < min_loss: saver.save(sess, "%s\\trained model\\model.ckpt" % (savedir)) min_loss = l1_loss_t if args.type == ['light']: albedo_out = np.minimum(np.maximum(sess.run(albedo), 0.0), 1.0) cv2.imwrite("%s/saved training/albedo.png" % (savedir), np.uint8(albedo_out[0, :, :, :] * 255)) center = np.prod(dims) // 2 cv2.imwrite("%s/saved training/reference.png" % (savedir), np.uint8(images[center, ::] * 255)) pair = all_pairs[(len(all_pairs) // len(images)) * center, ::] out_img, flows_out = sess.run( [interpolated, flows], feed_dict={ inputs: coordinates[pair[:num_n + 1], ::], Neighbors: images[pair[1:num_n + 1], ::] }) out_img = np.minimum(np.maximum(out_img, 0.0), 1.0) cv2.imwrite("%s/saved training/recons.png" % (savedir), np.uint8(out_img[0, ::] * 255)) flow_color = flow_vis.flow_to_color(flows_out[0, :, :, 0:2], convert_to_bgr=False) cv2.imwrite("%s/saved training/flow.png" % (savedir), np.uint8(flow_color)) img_mov.write(np.uint8(out_img[0, ::] * 255)) flow_mov.write(np.uint8(flow_color)) epoch = epoch + 1 if epoch == epoch_end // 2: lr = 0.00005 img_mov.release() flow_mov.release()