def process_frame(inputs): ''' Processes a single depth frame Args: inputs : tuple image path at time t=0, image path at time t=1, image path at time t=-1, sparse depth path at time t=0, ground truth path at time t=0 Returns: str : output concatenated image path at time t=0 str : output sparse depth path at time t=0 str : output validity map path at time t=0 str : output ground truth path at time t=0 ''' image0_path, \ image1_path, \ image2_path, \ sparse_depth_path, \ ground_truth_path = inputs # Read images and concatenate together image0 = cv2.imread(image0_path) image1 = cv2.imread(image1_path) image2 = cv2.imread(image2_path) image = np.concatenate([image1, image0, image2], axis=1) _, validity_map = data_utils.load_depth_with_validity_map(sparse_depth_path) # Create validity map and image output path validity_map_output_path = sparse_depth_path \ .replace(KITTI_DEPTH_COMPLETION_DIRPATH, KITTI_DEPTH_COMPLETION_OUTPUT_DIRPATH) \ .replace('sparse_depth', 'validity_map') image_output_path = validity_map_output_path \ .replace(os.path.join(os.sep + 'proj_depth', 'velodyne_raw'), '') \ .replace('validity_map', 'image') # Create output directories for output_path in [image_output_path, validity_map_output_path]: output_dirpath = os.path.dirname(output_path) if not os.path.exists(output_dirpath): try: os.makedirs(output_dirpath) except FileExistsError: pass # Write to disk data_utils.save_validity_map(validity_map, validity_map_output_path) cv2.imwrite(image_output_path, image) return (image_output_path, sparse_depth_path, validity_map_output_path, ground_truth_path)
def process_frame(params): image0_path, image1_path, image2_path, \ sparse_depth_path, semi_dense_depth_path = params # Read images and concatenate together image0 = cv2.imread(image0_path) image1 = cv2.imread(image1_path) image2 = cv2.imread(image2_path) image = np.concatenate([image1, image0, image2], axis=1) sz, vm = data_utils.load_depth_with_validity_map(sparse_depth_path) iz = data_utils.interpolate_depth(sz, vm) # Create validity map and image output path interp_depth_output_path = sparse_depth_path \ .replace(KITTI_DEPTH_COMPLETION_DIRPATH, KITTI_DEPTH_COMPLETION_OUTPUT_DIRPATH) \ .replace('sparse_depth', 'interp_depth') validity_map_output_path = sparse_depth_path \ .replace(KITTI_DEPTH_COMPLETION_DIRPATH, KITTI_DEPTH_COMPLETION_OUTPUT_DIRPATH) \ .replace('sparse_depth', 'validity_map') image_output_path = validity_map_output_path \ .replace(os.path.join(os.sep+'proj_depth', 'velodyne_raw'), '') \ .replace('validity_map', 'image') # Create output directories for output_path in [ image_output_path, interp_depth_output_path, validity_map_output_path ]: output_dirpath = os.path.dirname(output_path) if not os.path.exists(output_dirpath): try: os.makedirs(output_dirpath) except FileExistsError: pass # Write to disk data_utils.save_depth(iz, interp_depth_output_path) data_utils.save_validity_map(vm, validity_map_output_path) cv2.imwrite(image_output_path, image) return (image_output_path, sparse_depth_path, interp_depth_output_path, validity_map_output_path, semi_dense_depth_path)
def process_frame(args): image_path1, image_path0, image_path2, \ sparse_depth_path, validity_map_path, ground_truth_path = args # Create image composite of triplets im1 = cv2.imread(image_path1) im0 = cv2.imread(image_path0) im2 = cv2.imread(image_path2) imc = np.concatenate([im1, im0, im2], axis=1) # Create interpolated depth sz, vm = data_utils.load_depth_with_validity_map(sparse_depth_path) iz = data_utils.interpolate_depth(sz, vm) image_ref_path = os.path.join(*image_path0.split(os.sep)[2:]) sparse_depth_ref_path = os.path.join(*sparse_depth_path.split(os.sep)[2:]) # Set output paths image_output_path = os.path.join(VOID_OUT_DIRPATH, image_ref_path) sparse_depth_output_path = sparse_depth_path interp_depth_output_path = os.path.join(VOID_OUT_DIRPATH, sparse_depth_ref_path) \ .replace('sparse_depth', 'interp_depth') validity_map_output_path = validity_map_path ground_truth_output_path = ground_truth_path # Verify that all filenames match image_out_dirpath, image_filename = os.path.split(image_output_path) sparse_depth_filename = os.path.basename(sparse_depth_output_path) validity_map_filename = os.path.basename(validity_map_output_path) ground_truth_filename = os.path.basename(ground_truth_output_path) assert (image_filename == sparse_depth_filename) assert (image_filename == validity_map_filename) assert (image_filename == ground_truth_filename) # Write to disk cv2.imwrite(image_output_path, imc) data_utils.save_depth(iz, interp_depth_output_path) return (image_ref_path, image_output_path, sparse_depth_output_path, interp_depth_output_path, validity_map_output_path, ground_truth_output_path)
def process_frame(args): sparse_depth_path, validity_map_path = args # Create interpolated depth sz, vm = data_utils.load_depth_with_validity_map(sparse_depth_path) iz = data_utils.interpolate_depth(sz, vm) sparse_depth_ref_path = os.path.join(*sparse_depth_path.split(os.sep)[5:]) # Set output paths sparse_depth_output_path = sparse_depth_path interp_depth_output_path = os.path.join(VOID_OUT_DIRPATH, sparse_depth_ref_path) \ .replace('sparse_depth', 'interp_depth') validity_map_output_path = validity_map_path sparse_depth_filename = os.path.basename(sparse_depth_output_path) validity_map_filename = os.path.basename(validity_map_output_path) # Write to disk data_utils.save_depth(iz, interp_depth_output_path) return (sparse_depth_ref_path, sparse_depth_output_path, interp_depth_output_path, validity_map_output_path)
def train( train_sparse_depth_path, train_ground_truth_path, # Validation data val_sparse_depth_path=None, val_ground_truth_path=None, # Dataloader settings n_batch=settings.N_BATCH, n_height=settings.N_HEIGHT, n_width=settings.N_WIDTH, min_dataset_depth=settings.MIN_DATASET_DEPTH, max_dataset_depth=settings.MAX_DATASET_DEPTH, crop_type=settings.CROP_TYPE, augmentation_random_horizontal_crop=False, augmentation_random_vertical_crop=False, augmentation_random_horizontal_flip=False, # Network architecture network_type=settings.NETWORK_TYPE_SCAFFNET, activation_func=settings.ACTIVATION_FUNC, n_filter_output=settings.N_FILTER_OUTPUT_SCAFFNET, # Spatial pyramid pooling pool_kernel_sizes_spp=settings.POOL_KERNEL_SIZES_SPP, n_convolution_spp=settings.N_CONVOLUTION_SPP, n_filter_spp=settings.N_FILTER_SPP, # Depth prediction settings min_predict_depth=settings.MIN_PREDICT_DEPTH, max_predict_depth=settings.MAX_PREDICT_DEPTH, # Training settings learning_rates=settings.LEARNING_RATES, learning_schedule=settings.LEARNING_SCHEDULE, n_epoch=settings.N_EPOCH, loss_func=settings.LOSS_FUNC_SCAFFNET, # Depth evaluation settings min_evaluate_depth=settings.MIN_EVALUATE_DEPTH, max_evaluate_depth=settings.MAX_EVALUATE_DEPTH, # Checkpoint settings n_checkpoint=settings.N_CHECKPOINT, n_summary=settings.N_SUMMARY, checkpoint_path=settings.CHECKPOINT_PATH, restore_path=settings.RESTORE_PATH, # Hardware settings n_thread=settings.N_THREAD): model_path = os.path.join(checkpoint_path, 'model.ckpt') event_path = os.path.join(checkpoint_path, 'events') log_path = os.path.join(checkpoint_path, 'results.txt') # Load sparse depth, validity map and ground truth paths from file for training train_sparse_depth_paths = data_utils.read_paths(train_sparse_depth_path) train_ground_truth_paths = data_utils.read_paths(train_ground_truth_path) n_train_sample = len(train_sparse_depth_paths) assert n_train_sample == len(train_ground_truth_paths) n_train_step = n_epoch * np.ceil(n_train_sample / n_batch).astype(np.int32) # Load sparse depth, validity map and ground truth paths from file for validation val_sparse_depth_paths = data_utils.read_paths(val_sparse_depth_path) val_ground_truth_paths = data_utils.read_paths(val_ground_truth_path) n_val_sample = len(val_sparse_depth_paths) assert n_val_sample == len(val_ground_truth_paths) # Pad validation paths based on batch size val_sparse_depth_paths = data_utils.pad_batch(val_sparse_depth_paths, n_batch) # Load validation ground truth and do center crop val_ground_truths = [] for idx in range(len(val_ground_truth_paths)): # Load ground truth and validity map ground_truth, validity_map = \ data_utils.load_depth_with_validity_map(val_ground_truth_paths[idx]) # Get crop start and end positions if crop_type == 'center': start_height = int(float(ground_truth.shape[0] - n_height)) elif crop_type == 'bottom': start_height = ground_truth.shape[0] - n_height else: start_height = 0 end_height = n_height + start_height start_width = int(float(ground_truth.shape[1] - n_width) / 2.0) end_width = n_width + start_width # Concatenate ground truth and validity map together ground_truth = np.concatenate([ np.expand_dims(ground_truth, axis=-1), np.expand_dims(validity_map, axis=-1) ], axis=-1) # Crop ground truth val_ground_truths.append(ground_truth[start_height:end_height, start_width:end_width, :]) val_ground_truth_paths = data_utils.pad_batch(val_ground_truth_paths, n_batch) with tf.Graph().as_default(): # Set up current training step global_step = tf.Variable(0, trainable=False) # Initialize optimizer with learning schedule learning_schedule_steps = [ np.int32((float(v) / n_epoch) * n_train_step) for v in learning_schedule ] learning_rate_schedule = tf.train.piecewise_constant( global_step, learning_schedule_steps, learning_rates) optimizer = tf.train.AdamOptimizer(learning_rate_schedule) # Initialize dataloader dataloader = ScaffNetDataloader(shape=[n_batch, n_height, n_width, 2], name='scaffnet_dataloader', is_training=True, n_thread=n_thread, prefetch_size=(2 * n_thread)) # Fetch the input from dataloader input_depth = dataloader.next_element[0] ground_truth = dataloader.next_element[1] # Build computation graph model = ScaffNetModel(input_depth, ground_truth, is_training=True, network_type=network_type, activation_func=activation_func, n_filter_output=n_filter_output, pool_kernel_sizes_spp=pool_kernel_sizes_spp, n_convolution_spp=n_convolution_spp, n_filter_spp=n_filter_spp, min_dataset_depth=min_dataset_depth, max_dataset_depth=max_dataset_depth, min_predict_depth=min_predict_depth, max_predict_depth=max_predict_depth, loss_func=loss_func) # Compute loss and gradients loss = model.loss gradients = optimizer.compute_gradients(loss) gradients = optimizer.apply_gradients(gradients, global_step=global_step) model_summary = tf.summary.merge_all() # Count trainable parameters n_parameter = 0 for variable in tf.trainable_variables(): n_parameter += np.array(variable.get_shape().as_list()).prod() # Log settings log('Dataloader settings:', log_path) log( 'n_batch=%d n_height=%d n_width=%d' % (n_batch, n_height, n_width), log_path) log( 'min_dataset_depth=%.2f max_dataset_depth=%.2f' % (min_dataset_depth, max_dataset_depth), log_path) log('crop_type=%s' % (crop_type), log_path) log( 'random_horizontal_crop=%s random_vertical_crop=%s' % (augmentation_random_horizontal_crop, augmentation_random_vertical_crop), log_path) log( 'random_horizontal_flip=%s' % (augmentation_random_horizontal_flip), log_path) log('', log_path) log('Network settings:', log_path) log('network_type=%s n_parameter=%d' % (network_type, n_parameter), log_path) log('activation_func=%s' % (activation_func), log_path) log( 'n_filter_output=%s' % (str(n_filter_output) if n_filter_output > 0 else 'upsample'), log_path) log('', log_path) log('Spatial pyramid pooling settings:', log_path) log( 'pool_kernel_sizes_spp=[%s]' % (', '.join([str(i) for i in pool_kernel_sizes_spp])), log_path) log( 'n_convolution_spp=%d n_filter_spp=%d' % (n_convolution_spp, n_filter_spp), log_path) log('', log_path) log('Depth prediction settings:', log_path) log( 'min_predict_depth=%.2f max_predict_depth=%.2f' % (min_predict_depth, max_predict_depth), log_path) log('', log_path) log('Training settings:', log_path) log( 'n_sample=%d n_epoch=%d n_step=%d' % (n_train_sample, n_epoch, n_train_step), log_path) log('loss_func=%s' % (loss_func), log_path) log( 'learning_schedule=[%s, %d (%d)]' % (', '.join( '{} ({}) : {:.1E}'.format(s, v, r) for s, v, r in zip([0] + learning_schedule, [0] + learning_schedule_steps, learning_rates)), n_epoch, n_train_step), log_path) log('', log_path) log('Depth evaluation settings:', log_path) log( 'min_evaluate_depth=%.2f max_evaluate_depth=%.2f' % (min_evaluate_depth, max_evaluate_depth), log_path) log('', log_path) log('Checkpoint settings:', log_path) log('checkpoint_path=%s' % (checkpoint_path), log_path) log( 'restore_path=%s' % ('None' if restore_path == '' else restore_path), log_path) log('', log_path) # Initialize Tensorflow session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True session = tf.Session(config=config) # Initialize saver for storing and restoring checkpoints train_summary_writer = tf.summary.FileWriter(event_path + '-train', session.graph) val_summary_writer = tf.summary.FileWriter(event_path + '-val') train_saver = tf.train.Saver(max_to_keep=50) # Initialize all variables session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) # If given, load the weights from the restore path if restore_path != '': train_saver.restore(session, restore_path) # Begin training log('Begin training...', log_path) start_step = global_step.eval(session=session) time_start = time.time() train_step = start_step step = 0 do_center_crop = True if crop_type == 'center' else False do_bottom_crop = True if crop_type == 'bottom' else False # Shuffle data for current epoch train_sparse_depth_paths_epoch, \ train_ground_truth_paths_epoch = data_utils.make_epoch( input_arr=[train_sparse_depth_paths, train_ground_truth_paths], n_batch=n_batch) # Feed input paths into dataloader for training dataloader.initialize( session, sparse_depth_paths=train_sparse_depth_paths_epoch, ground_truth_paths=train_ground_truth_paths_epoch, do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=augmentation_random_horizontal_crop, random_vertical_crop=augmentation_random_vertical_crop, random_horizontal_flip=augmentation_random_horizontal_flip) while train_step < n_train_step: try: if train_step % n_summary == 0: # Compute loss and training summary _, loss_value, train_summary = session.run( [gradients, loss, model_summary]) # Write training results to summary train_summary_writer.add_summary(train_summary, global_step=train_step) else: # Compute loss _, loss_value = session.run([gradients, loss]) if train_step and (train_step % n_checkpoint) == 0: time_elapse = (time.time() - time_start) / 3600 * train_step / ( train_step - start_step + 1) time_remain = (n_train_step / train_step - 1) * time_elapse checkpoint_log = \ 'batch: {:>6}/{:>6} time elapsed: {:.2f}h time left: {:.2f}h \n' + \ 'loss: {:.5f}' log( checkpoint_log.format(train_step, n_train_step, time_elapse, time_remain, loss_value), log_path) # Feed input paths into dataloader for validation dataloader.initialize( session, sparse_depth_paths=val_sparse_depth_paths, ground_truth_paths=val_ground_truth_paths, do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=False, random_vertical_crop=False, random_horizontal_flip=False) # Run model on validation samples val_output_depths = run(model, session, n_sample=n_val_sample, summary=model_summary, summary_writer=val_summary_writer, step=train_step, verbose=False) # Run validation metrics eval_utils.evaluate(val_output_depths, val_ground_truths, train_step, log_path=log_path, min_evaluate_depth=min_evaluate_depth, max_evaluate_depth=max_evaluate_depth) # Switch back to training current_sample = n_batch * (step + 1) dataloader.initialize( session, sparse_depth_paths=train_sparse_depth_paths_epoch[ current_sample:], ground_truth_paths=train_ground_truth_paths_epoch[ current_sample:], do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop= augmentation_random_horizontal_crop, random_vertical_crop=augmentation_random_vertical_crop, random_horizontal_flip= augmentation_random_horizontal_flip) train_saver.save(session, model_path, global_step=train_step) train_step += 1 step += 1 except tf.errors.OutOfRangeError: step = 0 # Shuffle data for next epoch train_sparse_depth_paths_epoch, \ train_ground_truth_paths_epoch = data_utils.make_epoch( input_arr=[train_sparse_depth_paths, train_ground_truth_paths], n_batch=n_batch) # Feed input paths into dataloader for training dataloader.initialize( session, sparse_depth_paths=train_sparse_depth_paths_epoch, ground_truth_paths=train_ground_truth_paths_epoch, do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=augmentation_random_horizontal_crop, random_vertical_crop=augmentation_random_vertical_crop, random_horizontal_flip=augmentation_random_horizontal_flip) train_saver.save(session, model_path, global_step=n_train_step)
if refdir == 'image': image = cv2.imread(path) image = np.concatenate([image, image, image], axis=1) image_output_path = path \ .replace(KITTI_DEPTH_COMPLETION_DIRPATH, KITTI_DEPTH_COMPLETION_OUTPUT_DIRPATH) image_output_paths.append(image_output_path) if not os.path.exists(os.path.dirname(image_output_path)): os.makedirs(os.path.dirname(image_output_path)) # Write to disk cv2.imwrite(image_output_path, image) elif refdir == 'sparse_depth': # Load sparse depth and save validity map _, validity_map = data_utils.load_depth_with_validity_map(path) # Create validity map output path validity_map_output_path = path \ .replace(KITTI_DEPTH_COMPLETION_DIRPATH, KITTI_DEPTH_COMPLETION_OUTPUT_DIRPATH) \ .replace('sparse_depth', 'validity_map') sparse_depth_output_paths.append(path) validity_map_output_paths.append(validity_map_output_path) validity_map_output_dirpath = os.path.dirname(validity_map_output_path) if not os.path.exists(validity_map_output_dirpath): os.makedirs(validity_map_output_dirpath) # Write to disk data_utils.save_validity_map(validity_map, validity_map_output_path)
ground_truths = [] if ground_truth_available: ground_truth_paths = data_utils.read_paths(args.ground_truth_path) ground_truth_paths = ground_truth_paths[args.start_idx:args.end_idx] assert n_sample == len(ground_truth_paths) # Load ground truth for idx in range(n_sample): print('Loading {}/{} groundtruth depth maps'.format(idx + 1, n_sample), end='\r') ground_truth, validity_map = \ data_utils.load_depth_with_validity_map(ground_truth_paths[idx]) ground_truth = np.concatenate([ np.expand_dims(ground_truth, axis=-1), np.expand_dims(validity_map, axis=-1) ], axis=-1) ground_truths.append(ground_truth) ground_truth_paths = data_utils.pad_batch(ground_truth_paths, args.n_batch) print('Completed loading {} groundtruth depth maps'.format(n_sample)) ''' Build graph ''' with tf.Graph().as_default():
# Pad all paths based on batch im_paths = data_utils.pad_batch(im_paths, args.n_batch) iz_paths = data_utils.pad_batch(iz_paths, args.n_batch) vm_paths = data_utils.pad_batch(vm_paths, args.n_batch) n_step = len(im_paths) // args.n_batch gt_arr = [] if args.ground_truth_path != '': # Load ground truth for idx in range(n_sample): sys.stdout.write('Loading {}/{} groundtruth depth maps \r'.format( idx + 1, n_sample)) sys.stdout.flush() gt, vm = data_utils.load_depth_with_validity_map(gt_paths[idx]) gt = np.concatenate( [np.expand_dims(gt, axis=-1), np.expand_dims(vm, axis=-1)], axis=-1) gt_arr.append(gt) print('Completed loading {} groundtruth depth maps'.format(n_sample)) with tf.Graph().as_default(): # Initialize dataloader dataloader = DataLoader( shape=[args.n_batch, args.n_height, args.n_width, 3], name='dataloader', is_training=False, n_thread=args.n_thread,
def process_frame(inputs): ''' Processes a single depth frame Args: inputs : tuple KITTI sparse depth path, Virtual KITTI ground truth path, output directory paths in order of: sparse depth, validity map, semi-dense depth, dense depth, groundtruth Returns: str : Virtual KITTI output sparse depth path str : Virtual KITTI output validity map path str : Virtual KITTI output semi-dense depth (convex hull of sparse points) path str : Virtual KITTI output dense depth path (ground truth without sky) str : Virtual KITTI output ground truth path ''' # Separate arguments into individual variables kitti_sparse_depth_path, vkitti_ground_truth_path, output_dirpaths = inputs # Extract validity map from KITTI sparse depth _, kitti_validity_map = data_utils.load_depth_with_validity_map(kitti_sparse_depth_path) # Load Virtual KITTI ground truth vkitti_ground_truth = \ cv2.imread(vkitti_ground_truth_path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) # Convert Virtual KITTI ground truth to meters vkitti_ground_truth = vkitti_ground_truth / 100.0 if kitti_validity_map.shape != vkitti_ground_truth.shape: # Resize KITTI validity map to VKITTI size kitti_validity_map = cv2.resize( kitti_validity_map, dsize=(vkitti_ground_truth.shape[1], vkitti_ground_truth.shape[0]), interpolation=cv2.INTER_NEAREST) assert(np.all(np.unique(kitti_validity_map) == [0, 1])) # Get Virtual KITTI dense depth without sky vkitti_validity_map = np.ones(vkitti_ground_truth.shape) vkitti_validity_map[vkitti_ground_truth > 600.0] = 0.0 vkitti_dense_depth = vkitti_validity_map * vkitti_ground_truth # Get Virtual KITTI sparse depth vkitti_sparse_depth = kitti_validity_map * vkitti_dense_depth # Get Virtual KITTI semi-dense depth (convex hull of sparse points) vkitti_semi_dense_depth = \ np.where(skmorph.convex_hull_image(kitti_validity_map), 1, 0) * vkitti_dense_depth # Create output filepaths filename = os.path.basename(vkitti_ground_truth_path) output_sparse_depth_dirpath, \ output_validity_map_dirpath, \ output_semi_dense_depth_dirpath, \ output_dense_depth_dirpath, \ output_ground_truth_dirpath = output_dirpaths output_sparse_depth_path = os.path.join(output_sparse_depth_dirpath, filename) output_validity_map_path = os.path.join(output_validity_map_dirpath, filename) output_semi_dense_depth_path = os.path.join(output_semi_dense_depth_dirpath, filename) output_dense_depth_path = os.path.join(output_dense_depth_dirpath, filename) output_ground_truth_path = os.path.join(output_ground_truth_dirpath, filename) # Write to disk data_utils.save_depth(vkitti_sparse_depth, output_sparse_depth_path) data_utils.save_validity_map(kitti_validity_map, output_validity_map_path) data_utils.save_depth(vkitti_semi_dense_depth, output_semi_dense_depth_path) data_utils.save_depth(vkitti_dense_depth, output_dense_depth_path) data_utils.save_depth(vkitti_ground_truth, output_ground_truth_path) return (output_sparse_depth_path, output_validity_map_path, output_semi_dense_depth_path, output_dense_depth_path, output_ground_truth_path)
def train(train_image_path, train_input_depth_path, train_sparse_depth_path, train_intrinsics_path, # Validation data filepaths val_image_path=None, val_input_depth_path=None, val_sparse_depth_path=None, val_ground_truth_path=None, # Dataloader settings n_batch=settings.N_BATCH, n_height=settings.N_HEIGHT, n_width=settings.N_WIDTH, crop_type=settings.CROP_TYPE, augmentation_random_horizontal_crop=False, augmentation_random_vertical_crop=False, # Network settings network_type=settings.NETWORK_TYPE_FUSIONNET, image_filter_pct=settings.IMAGE_FILTER_PCT, depth_filter_pct=settings.DEPTH_FILTER_PCT, activation_func=settings.ACTIVATION_FUNC, # Depth prediction settings min_predict_depth=settings.MIN_PREDICT_DEPTH, max_predict_depth=settings.MAX_PREDICT_DEPTH, min_scale_depth=settings.MIN_SCALE_DEPTH, max_scale_depth=settings.MAX_SCALE_DEPTH, min_residual_depth=settings.MIN_RESIDUAL_DEPTH, max_residual_depth=settings.MAX_RESIDUAL_DEPTH, # Training settings n_epoch=settings.N_EPOCH, learning_rates=settings.LEARNING_RATES, learning_schedule=settings.LEARNING_SCHEDULE, # Loss function settings validity_map_color=settings.VALIDITY_MAP_COLOR, w_color=settings.W_COLOR, w_structure=settings.W_STRUCTURE, w_sparse_depth=settings.W_SPARSE_DEPTH, w_smoothness=settings.W_SMOOTHNESS, w_prior_depth=settings.W_PRIOR_DEPTH, residual_threshold_prior_depth=settings.RESIDUAL_THRESHOLD_PRIOR_DEPTH, rotation_param=settings.ROTATION_PARAM, # Depth evaluation settings min_evaluate_depth=settings.MIN_EVALUATE_DEPTH, max_evaluate_depth=settings.MAX_EVALUATE_DEPTH, # Checkpoint settings n_checkpoint=settings.N_CHECKPOINT, n_summary=settings.N_SUMMARY, checkpoint_path=settings.CHECKPOINT_PATH, restore_path=settings.RESTORE_PATH, # Hardware settings n_thread=settings.N_THREAD): model_path = os.path.join(checkpoint_path, 'model.ckpt') event_path = os.path.join(checkpoint_path, 'events') log_path = os.path.join(checkpoint_path, 'results.txt') # Load image, input depth, sparse depth, instrinsics paths from file train_image_paths = data_utils.read_paths(train_image_path) train_input_depth_paths = data_utils.read_paths(train_input_depth_path) train_sparse_depth_paths = data_utils.read_paths(train_sparse_depth_path) train_intrinsics_paths = data_utils.read_paths(train_intrinsics_path) n_train_sample = len(train_image_paths) assert n_train_sample == len(train_input_depth_paths) assert n_train_sample == len(train_sparse_depth_paths) assert n_train_sample == len(train_intrinsics_paths) n_train_step = n_epoch * np.ceil(n_train_sample / n_batch).astype(np.int32) # Load image, input depth, and sparse depth paths from file for validation val_image_paths = data_utils.read_paths(val_image_path) val_input_depth_paths = data_utils.read_paths(val_input_depth_path) val_sparse_depth_paths = data_utils.read_paths(val_sparse_depth_path) val_ground_truth_paths = data_utils.read_paths(val_ground_truth_path) n_val_sample = len(val_image_paths) assert n_val_sample == len(val_input_depth_paths) assert n_val_sample == len(val_sparse_depth_paths) assert n_val_sample == len(val_ground_truth_paths) val_image_paths = data_utils.pad_batch(val_image_paths, n_batch) val_input_depth_paths = data_utils.pad_batch(val_input_depth_paths, n_batch) val_sparse_depth_paths = data_utils.pad_batch(val_sparse_depth_paths, n_batch) # Load validation ground truth and do center crop val_ground_truths = [] for idx in range(len(val_ground_truth_paths)): ground_truth, validity_map_ground_truth = \ data_utils.load_depth_with_validity_map(val_ground_truth_paths[idx]) ground_truth = np.concatenate([ np.expand_dims(ground_truth, axis=-1), np.expand_dims(validity_map_ground_truth, axis=-1)], axis=-1) # Get start and end of crop if crop_type == 'center': start_height = int(float(ground_truth.shape[0] - n_height)) elif crop_type == 'bottom': start_height = ground_truth.shape[0] - n_height else: start_height = 0 end_height = n_height + start_height start_width = int(float(ground_truth.shape[1] - n_width) / 2.0) end_width = n_width + start_width ground_truth = \ ground_truth[start_height:end_height, start_width:end_width, :] val_ground_truths.append(ground_truth) val_ground_truth_paths = data_utils.pad_batch(val_ground_truth_paths, n_batch) with tf.Graph().as_default(): # Set up current training step global_step = tf.Variable(0, trainable=False) # Initialize optimizer with learning schedule learning_schedule_steps = [ np.int32((float(v) / n_epoch) * n_train_step) for v in learning_schedule ] learning_rate_schedule = tf.train.piecewise_constant( global_step, learning_schedule_steps, learning_rates) optimizer = tf.train.AdamOptimizer(learning_rate_schedule) # Initialize dataloader dataloader = FusionNetDataloader( shape=[n_batch, n_height, n_width, 3], name='fusionnet_dataloader', is_training=True, n_thread=n_thread, prefetch_size=2 * n_thread) # Fetch the input from dataloader image0 = dataloader.next_element[0] image1 = dataloader.next_element[1] image2 = dataloader.next_element[2] input_depth = dataloader.next_element[3] intrinsics = dataloader.next_element[4] # Build computation graph model = FusionNetModel( image0=image0, image1=image1, image2=image2, input_depth=input_depth, intrinsics=intrinsics, is_training=True, network_type=network_type, image_filter_pct=image_filter_pct, depth_filter_pct=depth_filter_pct, activation_func=activation_func, min_predict_depth=min_predict_depth, max_predict_depth=max_predict_depth, min_scale_depth=min_scale_depth, max_scale_depth=max_scale_depth, min_residual_depth=min_residual_depth, max_residual_depth=max_residual_depth, validity_map_color=validity_map_color, w_color=w_color, w_structure=w_structure, w_sparse_depth=w_sparse_depth, w_smoothness=w_smoothness, w_prior_depth=w_prior_depth, residual_threshold_prior_depth=residual_threshold_prior_depth, rotation_param=rotation_param) loss = model.loss gradients = optimizer.compute_gradients(loss) gradients = optimizer.apply_gradients(gradients, global_step=global_step) model_summary = tf.summary.merge_all() # Count trainable parameters n_parameter = 0 for variable in tf.trainable_variables(): n_parameter += np.array(variable.get_shape().as_list()).prod() # Log settings log('Dataloader settings:', log_path) log('n_batch=%d n_height=%d n_width=%d' % (n_batch, n_height, n_width), log_path) log('crop_type=%s' % (crop_type), log_path) log('random_horizontal_crop=%s random_vertical_crop=%s' % (augmentation_random_horizontal_crop, augmentation_random_vertical_crop), log_path) log('', log_path) log('Network settings:', log_path) log('network_type=%s n_parameter=%d' % (network_type, n_parameter), log_path) log('image_filter_pct=%.2f depth_filter_pct=%.2f' % (image_filter_pct, depth_filter_pct), log_path) log('activation_func=%s' % (activation_func), log_path) log('', log_path) log('Depth prediction settings:', log_path) log('min_predict_depth=%.2f max_predict_depth=%.2f' % (min_predict_depth, max_predict_depth), log_path) log('min_scale_depth=%.2f max_scale_depth=%.2f' % (min_scale_depth, max_scale_depth), log_path) log('min_residual_depth=%.2f max_residual_depth=%.2f' % (min_residual_depth, max_residual_depth), log_path) log('', log_path) log('Training settings:', log_path) log('n_sample=%d n_epoch=%d n_step=%d' % (n_train_sample, n_epoch, n_train_step), log_path) log('learning_schedule=[%s, %d (%d)]' % (', '.join('{} ({}) : {:.1E}'.format(s, v, r) for s, v, r in zip( [0] + learning_schedule, [0] + learning_schedule_steps, learning_rates)), n_epoch, n_train_step), log_path) log('validity_map_color=%s' % (validity_map_color), log_path) log('w_color=%.2f w_structure=%.2f w_sparse_depth=%.2f' % (w_color, w_structure, w_sparse_depth), log_path) log('w_smoothness=%.3f w_prior_depth=%.2f' % (w_smoothness, w_prior_depth), log_path) log('residual_threshold_prior_depth=%.2f' % (residual_threshold_prior_depth), log_path) log('rotation_param=%s' % (rotation_param), log_path) log('', log_path) log('Depth evaluation settings:', log_path) log('min_evaluate_depth=%.2f max_evaluate_depth=%.2f' % (min_evaluate_depth, max_evaluate_depth), log_path) log('', log_path) log('Checkpoint settings:', log_path) log('checkpoint_path=%s' % (checkpoint_path), log_path) log('restore_path=%s' % ('None' if restore_path == '' else restore_path), log_path) log('', log_path) # Initialize Tensorflow session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True session = tf.Session(config=config) # Initialize saver for storing and restoring checkpoints train_summary_writer = tf.summary.FileWriter(event_path + '-train', session.graph) val_summary_writer = tf.summary.FileWriter(event_path + '-val') train_saver = tf.train.Saver(max_to_keep=50) # Initialize all variables session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) # If given, load the weights from the restore path if restore_path != '': import tensorflow.contrib.slim as slim vars_to_restore_fusionnet = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=network_type) init_assign_op_fusionnet, init_feed_dict_fusionnet = slim.assign_from_checkpoint( restore_path, vars_to_restore_fusionnet, ignore_missing_vars=True) session.run(init_assign_op_fusionnet, init_feed_dict_fusionnet) vars_to_restore_posenet = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='posenet') init_assign_op_posenet, init_feed_dict_posenet = slim.assign_from_checkpoint( restore_path, vars_to_restore_posenet, ignore_missing_vars=True) session.run(init_assign_op_posenet, init_feed_dict_posenet) # Begin training log('Begin training...', log_path) start_step = global_step.eval(session=session) time_start = time.time() train_step = start_step step = 0 do_center_crop = True if crop_type == 'center' else False do_bottom_crop = True if crop_type == 'bottom' else False # Shuffle data for current epoch train_image_paths_epoch, \ train_input_depth_paths_epoch, \ train_sparse_depth_paths_epoch, \ train_intrinsics_paths_epoch = data_utils.make_epoch( input_arr=[ train_image_paths, train_input_depth_paths, train_sparse_depth_paths, train_intrinsics_paths], n_batch=n_batch) # Feed input paths into dataloader for training dataloader.initialize( session, image_paths=train_image_paths_epoch, input_depth_paths=train_input_depth_paths_epoch, sparse_depth_paths=train_sparse_depth_paths_epoch, intrinsics_paths=train_intrinsics_paths_epoch, do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=augmentation_random_horizontal_crop, random_vertical_crop=augmentation_random_vertical_crop) while train_step < n_train_step: try: if train_step % n_summary == 0: # Compute loss and training summary _, loss_value, train_summary = session.run([gradients, loss, model_summary]) # Write training summary train_summary_writer.add_summary(train_summary, global_step=train_step) else: # Compute loss _, loss_value = session.run([gradients, loss]) if train_step and (train_step % n_checkpoint) == 0: time_elapse = (time.time() - time_start) / 3600 * train_step / (train_step - start_step + 1) time_remain = (n_train_step / train_step - 1) * time_elapse checkpoint_log = \ 'batch: {:>6}/{:>6} time elapsed: {:.2f}h time left: {:.2f}h \n' + \ 'loss: {:.5f}' log(checkpoint_log.format( train_step, n_train_step, time_elapse, time_remain, loss_value), log_path) # Feed input paths into dataloader for validation dataloader.initialize( session, image_paths=val_image_paths, input_depth_paths=val_input_depth_paths, sparse_depth_paths=val_sparse_depth_paths, intrinsics_paths=train_intrinsics_paths[0:len(val_image_paths)], do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=False, random_vertical_crop=False) # Run model on validation samples val_output_depths = run( model, session, n_sample=n_val_sample, summary=model_summary, summary_writer=val_summary_writer, step=train_step, verbose=False) eval_utils.evaluate( val_output_depths, val_ground_truths, train_step, log_path=log_path, min_evaluate_depth=min_evaluate_depth, max_evaluate_depth=max_evaluate_depth) # Switch back to training current_sample = n_batch * (step + 1) dataloader.initialize( session, image_paths=train_image_paths_epoch[current_sample:], input_depth_paths=train_input_depth_paths_epoch[current_sample:], sparse_depth_paths=train_sparse_depth_paths_epoch[current_sample:], intrinsics_paths=train_intrinsics_paths_epoch[current_sample:], do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=augmentation_random_horizontal_crop, random_vertical_crop=augmentation_random_vertical_crop) train_saver.save(session, model_path, global_step=train_step) train_step += 1 step += 1 except tf.errors.OutOfRangeError: step = 0 # Shuffle data for next epoch train_image_paths_epoch, \ train_input_depth_paths_epoch, \ train_sparse_depth_paths_epoch, \ train_intrinsics_paths_epoch, = data_utils.make_epoch( input_arr=[ train_image_paths, train_input_depth_paths, train_sparse_depth_paths, train_intrinsics_paths], n_batch=n_batch) # Feed input paths into dataloader for training dataloader.initialize( session, image_paths=train_image_paths_epoch, input_depth_paths=train_input_depth_paths_epoch, sparse_depth_paths=train_sparse_depth_paths_epoch, intrinsics_paths=train_intrinsics_paths_epoch, do_center_crop=do_center_crop, do_bottom_crop=do_bottom_crop, random_horizontal_crop=augmentation_random_horizontal_crop, random_vertical_crop=augmentation_random_vertical_crop) train_saver.save(session, model_path, global_step=n_train_step)