def process(args): N = len(args.im1) assert N == len(args.im2) and N == len(args.out) device = torch.device("cuda:0") Flownet = FlowNet2(args) print(f"Loading pretrained model from '{args.pretrained_model_flownet2}'.") flownet2_ckpt = torch.load(args.pretrained_model_flownet2) Flownet.load_state_dict(flownet2_ckpt["state_dict"]) Flownet.to(device) Flownet.eval() for im1, im2, out in zip(args.im1, args.im2, args.out): if os.path.isfile(out): continue flow = infer(args, Flownet, device, im1, im2) if args.size: flow = resize_flow(flow, args.size) print(flow.shape) os.makedirs(os.path.dirname(out), exist_ok=True) save_raw_float32_image(out, flow) if args.visualize: vis = flow_to_image(flow) cv2.imwrite(os.path.splitext(out)[0] + ".png", vis)
def downscale_frames(self, subdir, max_size, ext, align=16, full_subdir="color_full"): full_dir = pjoin(self.path, full_subdir) down_dir = pjoin(self.path, subdir) mkdir_ifnotexists(down_dir) if self.check_frames(down_dir, ext): # Frames are already extracted and checked OK. return for i in range(self.frame_count): full_file = "%s/frame_%06d.png" % (full_dir, i) down_file = ("%s/frame_%06d." + ext) % (down_dir, i) suppress_messages = (i > 0) image = image_io.load_image(full_file, max_size=max_size, align=align, suppress_messages=suppress_messages) image = image[..., ::-1] # Channel swizzle if ext == "raw": image_io.save_raw_float32_image(down_file, image) else: cv2.imwrite(down_file, image * 255) self.check_frames(down_dir, ext)
def save_depth(self, dir: str = None, frames=None): if dir is None: dir = self.out_dir if frames is None: frames = self.frames color_fmt = pjoin(self.base_dir, "color_down", "frame_{:06d}.raw") depth_dir = pjoin(dir, "depth") depth_fmt = pjoin(depth_dir, "frame_{:06d}") dataset = VideoFrameDataset(color_fmt, frames) data_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True self.model.eval() os.makedirs(depth_dir, exist_ok=True) for data in data_loader: data = to_device(data) stacked_images, metadata = data frame_id = metadata["frame_id"][0] depth = self.model.forward(stacked_images, metadata) depth = depth.detach().cpu().numpy().squeeze() inv_depth = 1.0 / depth image_io.save_raw_float32_image( depth_fmt.format(frame_id) + ".raw", inv_depth) with SuppressedStdout(): visualization.visualize_depth_dir(depth_dir, depth_dir, force=True)
def eval_and_save(self, criterion, data_loader, suf) -> Dict[str, torch.Tensor]: """ Note this function asssumes the structure of the data produced by data_loader """ N = len(data_loader.dataset) loss_dict = {} saved_frames = set() total_index = 0 max_frame_index = 0 all_pairs = [] for _, data in zip(range(N), data_loader): data = to_device(data) stacked_img, metadata = data with torch.no_grad(): depth = self.model(stacked_img, metadata) batch_indices = ( metadata["geometry_consistency"]["indices"].cpu().numpy().tolist() ) # Update the maximum frame index and pairs list. max_frame_index = max(max_frame_index, max(itertools.chain(*batch_indices))) all_pairs += batch_indices # Compute and store losses. _, loss_meta = criterion( depth, metadata, parameters=self.model.parameters(), ) for loss_name, losses in loss_meta.items(): if loss_name not in loss_dict: loss_dict[loss_name] = {} for indices, loss in zip(batch_indices, losses): loss_dict[loss_name][str(indices)] = loss.item() # Save depth maps. inv_depths_batch = 1.0 / depth.cpu().detach().numpy() if self.vis_depth_scale is None: # Single scale for the whole dataset. self.vis_depth_scale = inv_depths_batch.max() for inv_depths, indices in zip(inv_depths_batch, batch_indices): for inv_depth, index in zip(inv_depths, indices): # Only save frames not saved before. if index in saved_frames: continue saved_frames.add(index) fn_pre = pjoin( self.out_dir, "eval", "depth_{:06d}{}".format(index, suf) ) image_io.save_raw_float32_image(fn_pre + ".raw", inv_depth) inv_depth_vis = visualization.visualize_depth( inv_depth, depth_min=0, depth_max=self.vis_depth_scale ) cv2.imwrite(fn_pre + ".png", inv_depth_vis) total_index += 1 loss_meta = { loss_name: torch.tensor(tuple(loss.values())) for loss_name, loss in loss_dict.items() } loss_dict["mean"] = {k: v.mean().item() for k, v in loss_meta.items()} with open(pjoin(self.out_dir, "eval", "loss{}.json".format(suf)), "w") as f: json.dump(loss_dict, f) # Print verbose summary to stdout. index_width = int(math.ceil(math.log10(max_frame_index))) loss_names = list(loss_dict.keys()) loss_names.remove("mean") loss_format = {} for name in loss_names: max_value = max(loss_dict[name].values()) width = math.ceil(math.log10(max_value)) loss_format[name] = f"{width+7}.6f" for pair in sorted(all_pairs): line = f"({pair[0]:{index_width}d}, {pair[1]:{index_width}d}): " line += ", ".join( [f"{name}: {loss_dict[name][str(pair)]:{loss_format[name]}}" for name in loss_names] ) print(line) print("Mean: " + " " * (2 * index_width) + ", ".join( [f"{name}: {loss_dict[name][str(pair)]:{loss_format[name]}}" for name in loss_names] )) return loss_meta
def calibrate_scale(video, out_dir, frame_range, args): # COLMAP reconstruction. 重键colMap print_banner("COLMAP reconstruction") colmap_dir = pjoin(video.path, 'colmap_dense') src_meta_file = pjoin(colmap_dir, "metadata.npz") # 计算colmap colmap = COLMAPProcessor(args.colmap_bin_path) dense_dir = colmap.dense_dir(colmap_dir, 0) # 确认元数据文件是否存在 if os.path.isfile(src_meta_file): print("Checked metadata file exists.") else: color_dir = prepare_colmap_color(video) # 计算col map if not colmap.check_dense( dense_dir, color_dir, valid_ratio=args.dense_frame_ratio): path_args = [color_dir, colmap_dir] mask_path = pjoin(video.path, 'colmap_mask') if os.path.isdir(mask_path): path_args.extend(['--mask_path', mask_path]) colmap_args = COLMAPParams().parse_args( args=path_args + ['--dense_max_size', str(args.size)], namespace=args) colmap.process(colmap_args) intrinsics, extrinsics = make_camera_params_from_colmap( video.path, colmap.sparse_dir(colmap_dir, 0)) np.savez(src_meta_file, intrinsics=intrinsics, extrinsics=extrinsics) # Convert COLMAP dense depth maps to .raw file format. 将深度图像转换为.raw的rgb图像 print_banner("Convert COLMAP depth maps") converted_depth_fmt = pjoin(video.path, "depth_colmap_dense", "depth", "frame_{:06d}.raw") # convert colmap dense depths to .raw 将其转换为.raw图像 converted_depth_dir = os.path.dirname(converted_depth_fmt) dense_depth_dir = pjoin(dense_dir, "stereo", "depth_maps") frames = frame_range.frames() if not check_frames( dense_depth_dir, colmap.dense_depth_suffix(), converted_depth_dir, "", frame_names={f"frame_{i:06d}.png" for i in frames}, ): os.makedirs(converted_depth_dir, exist_ok=True) colmap_depth_fmt = pjoin( dense_depth_dir, "frame_{:06d}.png" + colmap.dense_depth_suffix()) for i in frames: colmap_depth_fn = colmap_depth_fmt.format(i) if not os.path.isfile(colmap_depth_fn): logging.warning("[SCALE CALIBRATION] %s does not exist.", colmap_depth_fn) continue cmp_depth = load_colmap.read_array(colmap_depth_fn) inv_cmp_depth = 1.0 / cmp_depth ix = np.isinf(inv_cmp_depth) | (inv_cmp_depth < 0) inv_cmp_depth[ix] = float("nan") image_io.save_raw_float32_image(converted_depth_fmt.format(i), inv_cmp_depth) with SuppressedStdout(): visualization.visualize_depth_dir( converted_depth_dir, converted_depth_dir, force=True, min_percentile=0, max_percentile=99, ) # Compute scaled depth maps print_banner("Compute per-frame scales") scaled_depth_dir = pjoin(out_dir, "depth_scaled_by_colmap_dense", "depth") scaled_depth_fmt = pjoin(scaled_depth_dir, "frame_{:06d}.raw") scales_file = pjoin(out_dir, "scales.csv") src_depth_fmt = pjoin(video.path, f"depth_{args.model_type}", "depth", "frame_{:06d}.raw") frames = frame_range.frames() if (check_frames(converted_depth_dir, ".png", os.path.dirname(scaled_depth_fmt), ".raw") and os.path.isfile(scales_file)): src_to_colmap_scales = np.loadtxt(scales_file, delimiter=',') assert src_to_colmap_scales.shape[0] >= len(frames) * args.dense_frame_ratio \ and src_to_colmap_scales.shape[1] == 2, \ (f"scales shape is {src_to_colmap_scales.shape} does not match " + f"({len(frames)}, 2) with threshold {args.dense_frame_ratio}") print("Existing scales file loaded.") else: # Scale depth maps os.makedirs(scaled_depth_dir, exist_ok=True) src_to_colmap_scales_map = {} for i in frames: converted_depth_fn = converted_depth_fmt.format(i) if not os.path.isfile(converted_depth_fn): logging.warning("[SCALE CALIBRATION] %s does not exist", converted_depth_fn) continue # convert colmap_depth to raw inv_cmp_depth = image_io.load_raw_float32_image(converted_depth_fn) # compute scale for init depths inv_src_depth = image_io.load_raw_float32_image( src_depth_fmt.format(i)) # src_depth * scale = (1/inv_src_depth) * scale == cmp_depth inv_cmp_depth = cv2.resize(inv_cmp_depth, inv_src_depth.shape[:2][::-1], interpolation=cv2.INTER_NEAREST) ix = np.isfinite(inv_cmp_depth) if np.sum(ix) / ix.size < args.dense_pixel_ratio: # not enough pixels are valid and hence the frame is invalid. continue scales = (inv_src_depth / inv_cmp_depth)[ix] scale = np.median(scales) print(f"Scale[{i}]: median={scale}, std={np.std(scales)}") # scale = np.median(inv_depth) * np.median(cmp_depth) src_to_colmap_scales_map[i] = float(scale) scaled_inv_src_depth = inv_src_depth / scale image_io.save_raw_float32_image(scaled_depth_fmt.format(i), scaled_inv_src_depth) with SuppressedStdout(): visualization.visualize_depth_dir(scaled_depth_dir, scaled_depth_dir, force=True) # Write scales.csv xs = sorted(src_to_colmap_scales_map.keys()) ys = [src_to_colmap_scales_map[x] for x in xs] src_to_colmap_scales = np.stack((np.array(xs), np.array(ys)), axis=-1) np.savetxt(scales_file, src_to_colmap_scales, delimiter=",") valid_frames = {int(s) for s in src_to_colmap_scales[:, 0]} # Scale the extrinsics' translations scaled_meta_file = pjoin(out_dir, "metadata_scaled.npz") if os.path.isfile(scaled_meta_file): print("Scaled metadata file exists.") else: scales = src_to_colmap_scales[:, 1] mean_scale = scales.mean() print(f"[scales] mean={mean_scale}, std={np.std(scales)}") with np.load(src_meta_file) as meta_colmap: intrinsics = meta_colmap["intrinsics"] extrinsics = meta_colmap["extrinsics"] extrinsics[..., -1] /= mean_scale np.savez( scaled_meta_file, intrinsics=intrinsics, extrinsics=extrinsics, scales=src_to_colmap_scales, ) color_fmt = pjoin(video.path, "color_down", "frame_{:06d}.raw") vis_dir = pjoin(out_dir, "vis_calibration_dense") visualize_all_calibration( extrinsics, intrinsics, scaled_depth_fmt, color_fmt, frame_range, vis_dir, ) return valid_frames