Example #1
0
    def __init__(self, options):
        self.opt = options

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = "cuda"

        if self.opt.threeinput:
            self.models["encoder"] = networks.ResnetEncoder(
                self.opt.num_layers, pretrained=True, num_input_channels=8)
            self.models["encoder"].to(self.device)
        else:
            self.models["encoder"] = networks.ResnetEncoder(
                self.opt.num_layers, pretrained=True)
            self.models["encoder"].to(self.device)

        self.parameters_to_train += list(self.models["encoder"].parameters())
        self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc,
                                            num_output_channels=1)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())
        self.models["confidence"] = ConfidenceDecoder(
            self.models["encoder"].num_ch_enc, num_output_channels=1)
        self.models["confidence"].to(self.device)

        self.set_dataset()

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\t  ", self.opt.split)

        self.load_model()

        self.MIN_DEPTH = 1e-3
        self.MAX_DEPTH = 80

        self.STEREO_SCALE_FACTOR = 5.4

        self.sfnormOptimizer = SurfaceNormalOptimizer(
            height=self.opt.crph,
            width=self.opt.crpw,
            batch_size=self.opt.batch_size).cuda()

        self.variancebar = torch.from_numpy(variancebar).cuda().float()
        self.variancebar[self.variancebar > 0] = self.variancebar[
            self.variancebar > 0] / self.opt.variancefold

        from integrationModule import IntegrationFunction
        self.integrationFunction = IntegrationFunction.apply
    def __init__(self):
        super().__init__()
        self.num_classes = 9
        self.cam_thresh = 0.9
        self.seg_ratio = 0.2
        self.encoder = networks.ResnetEncoder(18, False)
        self.encoder_cls = networks.ResnetEncoder(18, True)

        model_name = 'mono+stereo_640x192'
        model_path = os.path.join("models", model_name)
        print("-> Loading model from ", model_path)
        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")
        loaded_dict_enc = torch.load(encoder_path, map_location=device)
        feed_height = loaded_dict_enc['height']
        feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict()}
        self.encoder.load_state_dict(filtered_dict_enc)
        # self.encoder.eval();
        
        self.decoder = networks.DepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(5),
            num_output_channels=self.num_classes)
        
        self.depth_decoder = DebugDepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(4))
        loaded_dict = torch.load(depth_decoder_path, map_location=device)
        self.depth_decoder.load_state_dict(loaded_dict)
        # self.decoder.eval();

        self.encoder_out_channels = self.encoder.num_ch_enc[-1]
        self.classifer_conv1 = nn.Conv2d(self.encoder_out_channels, 1024, 3, padding=1)
        self.classifer_drop1 = nn.Dropout2d(p=0.5)
        self.classifer_conv2 = nn.Conv2d(1024, self.num_classes, 1, bias=False)
        # self.seg_conv1 = nn.Conv2d(self.encoder_out_channels, self.num_classes, 1)
        self.train_loss = []
        self.loss_decomp = {'cls':[], 'seed':[], 'dCRF':[]}
        self.val_loss = []
        self.test_loss = []
        self.rloss_weight = 2e-9 #2e-9
        self.rloss_scale = 0.5
        self.rloss_sig_rgb = 15
        self.rloss_sig_xy = 100
        self.lr = 1e-3
        self.densecrflosslayer = DenseCRFLoss(weight=self.rloss_weight, 
                                              sigma_rgb=self.rloss_sig_rgb, 
                                              sigma_xy=self.rloss_sig_xy, 
                                              scale_factor=self.rloss_scale)
    def monodepth2_init(self,args):
        self.encoder_path = args.encoder_path
        self.depth_decoder_path = args.depth_decoder_path


        # encoder init
        self.encoder = networks.ResnetEncoder(18, False)
        self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device)

        self.feed_height = self.loaded_dict_enc['height']
        self.feed_width = self.loaded_dict_enc['width']
        self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()}
        self.encoder.load_state_dict(self.filtered_dict_enc)
        self.encoder.to(self.device)
        self.encoder.eval()

        # decoder

        self.depth_decoder = networks.DepthDecoder2([64, 64, 128, 256, 512])
        self.loaded_dict_dec = torch.load(self.depth_decoder_path, map_location=self.device)
        self.filtered_dict_dec = {k: v for k, v in self.loaded_dict_dec.items() if k in self.depth_decoder.state_dict()}

        self.depth_decoder.load_state_dict(self.filtered_dict_dec)
        self.depth_decoder.to(self.device)
        self.depth_decoder.eval()

        ## inputs size

        if args.feed_height and args.feed_width:
            self.feed_height =  args.feed_height
            self.feed_width = args.feed_width
        else:
            self.feed_height = self.loaded_dict_enc['height']
            self.feed_width = self.loaded_dict_enc['width']
Example #4
0
    def __init__(self, options):
        self.opt = options

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.device = "cuda"

        self.depthmodels = {}
        self.depthmodels["depthencoder"] = networks.ResnetEncoder(
            self.opt.num_layers, pretrained=False, num_input_channels=3)
        self.depthmodels["depthdecoder"] = DepthDecoder(
            self.depthmodels["depthencoder"].num_ch_enc, num_output_channels=1)
        self.depthmodels["depthencoder"].to(self.device)
        self.depthmodels["depthdecoder"].to(self.device)
        self.load_model(weightFolder=self.opt.load_depthweights_folder,
                        encoderName='depthencoder',
                        decoderName='depthdecoder',
                        encoder=self.depthmodels["depthencoder"],
                        decoder=self.depthmodels["depthdecoder"])
        for m in self.depthmodels.values():
            m.eval()

        print("Training is using:\t", self.device)

        self.set_dataset()

        self.MIN_DEPTH = 1e-3
        self.MAX_DEPTH = 80

        self.STEREO_SCALE_FACTOR = 5.4
Example #5
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = "cuda"

        self.models["encoder"] = networks.ResnetEncoder(self.opt.num_layers,
                                                        pretrained=True)
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())
        self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc,
                                            num_output_channels=2)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())
        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        print("Training model named:\t", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\t",
              self.opt.log_dir)
        print("Training is using:\t", self.device)

        self.set_dataset()
        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\t  ", self.opt.split)
        print("There are {:d} training items and {:d} validation items".format(
            self.train_num, self.val_num))

        if self.opt.load_weights_folder is not None:
            self.load_model()

        self.save_opts()

        self.MIN_DEPTH = 1e-3
        self.MAX_DEPTH = 80

        self.best_abs = 1e10

        self.sfnormOptimizer = SurfaceNormalOptimizer(
            height=self.opt.height,
            width=self.opt.width,
            batch_size=self.opt.batch_size).cuda()
Example #6
0
    def __init__(self, options):
        self.opt = options

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}

        self.device = "cuda"

        self.models["encoder_norm"] = networks.ResnetEncoder(
            self.opt.num_layers, pretrained=True)
        self.models["encoder_norm"].to(self.device)
        self.models["norm"] = networks.DepthDecoder(
            self.models["encoder_norm"].num_ch_enc, num_output_channels=2)
        self.models["norm"].to(self.device)

        self.set_dataset()

        self.load_model()

        self.crph = 365
        self.crpw = 1220

        os.makedirs(self.opt.output_path, exist_ok=True)
        self.dirmapping = {'l': 'image_02', 'r': 'image_03'}
Example #7
0
    def __init__(self, options):
        self.opt = options

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = "cuda"

        self.models["encoder_norm"] = networks.ResnetEncoder(self.opt.num_layers, pretrained=True)
        self.models["encoder_norm"].to(self.device)
        self.models["norm"] = DepthDecoder(self.models["encoder_norm"].num_ch_enc, num_output_channels=3)
        self.models["norm"].to(self.device)

        self.set_dataset()

        self.depth_metric_names = ["de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"]

        self.MIN_DEPTH = 1e-3
        self.MAX_DEPTH = 80

        self.minabsrel = 1e10
        self.maxa1 = -1e10

        self.STEREO_SCALE_FACTOR = 5.4

        self.sfnormOptimizer = SurfaceNormalOptimizer(height=self.opt.crph, width=self.opt.crpw, batch_size=self.opt.batch_size).cuda()
    def __init__(self, lr=7e-3, encoder_depth=18):
        super().__init__()
        self.num_classes = 9
        self.encoder = networks.ResnetEncoder(encoder_depth, True)
        self.decoder = networks.DepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc,
            scales=range(5),
            num_output_channels=self.num_classes)

        self.depth_decoder = DebugDepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(4))
        # loaded_dict = torch.load(depth_decoder_path, map_location=device)
        # self.depth_decoder.load_state_dict(loaded_dict)
        # self.decoder.eval();

        self.encoder_out_channels = self.encoder.num_ch_enc[-1]
        self.train_loss = []
        self.loss_decomp = {'seed': [], 'dCRF': []}
        self.val_loss = []
        self.test_loss = []
        self.rloss_weight = 2e-9  #2e-9
        self.rloss_scale = 0.5
        self.rloss_sig_rgb = 15
        self.rloss_sig_xy = 100
        self.lr = lr
        self.densecrflosslayer = DenseCRFLoss(weight=self.rloss_weight,
                                              sigma_rgb=self.rloss_sig_rgb,
                                              sigma_xy=self.rloss_sig_xy,
                                              scale_factor=self.rloss_scale)
Example #9
0
def loadModel(model_name, epoch_num):
    # Set up network and load weights
    model_path = join(abspath('./logs'), model_name)
    opts_path = join(model_path, 'models/opt.json')
    weights_path = join(model_path, 'models', 'weights_{}'.format(epoch_num))

    # Load pretrained model options
    with open(opts_path, 'r') as f:
        opts = json.load(f)
    encoder = networks.ResnetEncoder(opts['num_layers'], False)
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=opts['scales'])
    encoder_path = join(weights_path, 'encoder.pth')
    depth_decoder_path = join(weights_path, 'depth.pth')

    # Load encoder network with weights. Verify encoder architecture
    loaded_dict_enc = torch.load(encoder_path)
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)

    # Load depth decoder network with weights
    loaded_dict = torch.load(depth_decoder_path)
    depth_decoder.load_state_dict(loaded_dict)

    # Set to eval mode on GPU
    encoder.cuda()
    depth_decoder.cuda()
    encoder.eval()
    depth_decoder.eval()

    return encoder, depth_decoder, opts
def LoadDepthModels(image_name, image):
    modelsDict = {
        'CAM_BACK.jpeg': 'monoback',
        'CAM_FRONT.jpeg': 'monofront',
        'CAM_FRONT_LEFT.jpeg': 'monofrontleft',
        'CAM_FRONT_RIGHT.jpeg': 'monofrontright',
        'CAM_BACK_LEFT.jpeg': 'monobackleft',
        'CAM_BACK_RIGHT.jpeg': 'monobackright'
    }
    model_path = os.path.join("models", modelsDict[image_name])
    #print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")
    #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = "cuda"

    # LOADING PRETRAINED MODEL
    #print("Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    #print("Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    original_width, original_height = image.size
    input_image = image.resize((feed_width, feed_height), Image.LANCZOS)
    input_image = transforms.ToTensor()(input_image).unsqueeze(0)

    input_image = input_image.to(device)
    features = encoder(input_image)
    outputs = depth_decoder(features)

    disp = outputs[("disp", 0)]
    disp_resized = torch.nn.functional.interpolate(
        disp, (original_height, original_width),
        mode="bilinear",
        align_corners=False)
    #print(disp_resized.shape)
    _, depth = disp_to_depth(disp_resized, 0.1, 100)
    #print(depth.shape)
    #print(type(depth.squeeze(0)))
    return depth.squeeze(0)
Example #11
0
def getMonoDepth(input_image):
    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    loc=baseLoc+'monodepth2/'

    model_path = os.path.join(loc+"models", 'mono+stereo_640x192')
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    with torch.no_grad():
        input_image = pil.fromarray(input_image)
        # input_image = pil.open(image_path).convert('RGB')
        original_width, original_height = input_image.size
        input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
        input_image = transforms.ToTensor()(input_image).unsqueeze(0)

        # PREDICTION
        input_image = input_image.to(device)
        features = encoder(input_image)
        outputs = depth_decoder(features)

        disp = outputs[("disp", 0)]
        disp_resized = torch.nn.functional.interpolate(
            disp, (original_height, original_width), mode="bilinear", align_corners=False)

        # Saving colormapped depth image
        disp_resized_np = disp_resized.squeeze().cpu().numpy()
        vmax = np.percentile(disp_resized_np, 95)
        vmin = disp_resized_np.min()
        disp_resized_np = vmin + (disp_resized_np - vmin) * (vmax - vmin) / (disp_resized_np.max() - vmin)
        disp_resized_np = (255 * (disp_resized_np - vmin) / (vmax - vmin)).astype(np.uint8)
        colormapped_im = cv2.applyColorMap(disp_resized_np, cv2.COLORMAP_HOT)
        colormapped_im = cv2.cvtColor(colormapped_im, cv2.COLOR_BGR2RGB)
        # normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
        # mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
        # colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
    return colormapped_im
Example #12
0
def depth_Estimation(args):
	model_name = args.model_name
	#Setting up the network
	print("Loading model....")
	download_model_if_doesnt_exist(model_name)
	encoder_path = os.path.join("models", model_name, "encoder.pth")
	depth_decoder_path = os.path.join("models", model_name, "depth.pth")

	# LOADING PRETRAINED MODEL
	encoder = networks.ResnetEncoder(18, False)
	depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4))

	loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
	filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
	encoder.load_state_dict(filtered_dict_enc)

	loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
	depth_decoder.load_state_dict(loaded_dict)

	encoder.eval()
	depth_decoder.eval();

	#Loading image
	print("Loading image....")
	image_path = args.image_path
	input_image = pil.open(image_path).convert('RGB')
	original_width, original_height = input_image.size
	feed_height = loaded_dict_enc['height']
	feed_width = loaded_dict_enc['width']
	input_image_resized = input_image.resize((feed_width, feed_height), pil.LANCZOS)

	input_image_pytorch = transforms.ToTensor()(input_image_resized).unsqueeze(0)
	input_npy = input_image_pytorch.squeeze().cpu().numpy()


	#prediction of disparity image
	with torch.no_grad():
		features = encoder(input_image_pytorch)
		outputs = depth_decoder(features)
		disp = outputs[("disp", 0)]

	 #Scaling for given resolution
	disp_resized = torch.nn.functional.interpolate(disp,
	(original_height, original_width), mode="bilinear", align_corners=False) # interpolate the values in to fit the given resolution of the image
	disp_resized_np = disp_resized.squeeze().cpu().numpy() # Converting tensor in pytorch to numpy array
	
	print("resized disp" + str(disp_resized_np.shape))
	print("Range of Depth in image")
	scaled,dep = disp_to_depth(disp_resized_np,0.1,1000) # resizing the depth from 0.1 to 1000 units
	print("min->"+str(dep.min())+"mx->"+str(dep.max()))

	#Preview of the rgb and Depth images
	rgb = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
	depth = dep.reshape((rgb.shape[0],rgb.shape[1]),order='C')
	plot(rgb,depth)

	return rgb,depth
Example #13
0
def network_define(opt, data_path, height, width):
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)

    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    print("-> Loading weights from {}".format(opt.load_weights_folder))

    filenames = readlines(os.path.join(splits_dir, opt.eval_split, split_file))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")

    encoder_dict = torch.load(encoder_path,
                              map_location=torch.device("cuda:1"))

    if opt.dataset_val[0] == "kitti":
        dataset = datasets.KITTIRAWDataset(data_path,
                                           filenames,
                                           height,
                                           width, [0],
                                           4,
                                           is_train=False)
    elif opt.dataset_val[0] == "vkitti":
        dataset = datasets.VKITTIDataset(data_path,
                                         filenames,
                                         height,
                                         width, [0],
                                         4,
                                         is_train=False)
    # dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
    #                         pin_memory=True, drop_last=False)
    dataloader = DataLoader(
        dataset,
        1,
        shuffle=False,
        num_workers=opt.num_workers,
        pin_memory=True,
        drop_last=False,
        collate_fn=my_collate_fn
    )  ## the default collate_fn will fail because there are non-deterministic length sample

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc)

    model_dict = encoder.state_dict()
    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in model_dict})
    depth_decoder.load_state_dict(
        torch.load(decoder_path, map_location=torch.device("cuda:1")))

    encoder.cuda(1)
    encoder.eval()
    depth_decoder.cuda(1)
    depth_decoder.eval()

    return encoder, depth_decoder, dataloader, filenames
    def initUi(self):
        self.num=0
        self.timer_camera = QTimer()  # 定义定时器,用于控制显示视频的帧率
        self.timer_camera2 = QTimer()
        self.resize(640, 480)
        self.setWindowTitle("test_simple")
        self.centralwidget = QWidget()
        self.label_show_camera = QLabel(self.centralwidget)  # 定义显示视频的Label
        self.label_show_camera.setFixedSize(640, 480)
        self.label_show_camera.setGeometry(0, 0, 640, 480)
        self.setCentralWidget(self.centralwidget)
        self.timer_camera.timeout.connect(self.__show_camera__)
        self.timer_camera2.timeout.connect(self.__show_rate__)

        self.picture_path = "./picture/test.jpg"
        #self.encoder_path = "/home/wang/models/mono+stereo_640x192/encoder.pth"
        #self.depth_decoder_path = "/home/wang/models/mono+stereo_640x192/depth.pth"

        self.encoder_path = "/home/roit/models/monodepth2_official/mono_640x192/encoder.pth"
        self.depth_decoder_path = "/home/roit/models/monodepth2_official/mono_640x192/depth.pth"

        if torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")

        print("-> device:",self.device)

        self.encoder = networks.ResnetEncoder(18, False)
        self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device)

        self.feed_height = self.loaded_dict_enc['height']
        self.feed_width = self.loaded_dict_enc['width']
        self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()}


        self.encoder.load_state_dict(self.filtered_dict_enc)
        self.encoder.to(self.device)
        self.encoder.eval()


       #decoder
        self.depth_decoder = networks.DepthDecoder2([64, 64, 128, 256, 512])
        self.loaded_dict_dec = torch.load(self.depth_decoder_path, map_location=self.device)
        self.filtered_dict_dec = {k: v for k, v in self.loaded_dict_dec.items() if k in self.depth_decoder.state_dict()}

        self.depth_decoder.load_state_dict(self.filtered_dict_dec)
        self.depth_decoder.to(self.device)
        self.depth_decoder.eval()

        #
        self.paths = [self.picture_path]
        self.output_directory = os.path.dirname(self.picture_path)

        self.timer_camera.start(10 )
        self.timer_camera2.start(1000)
Example #15
0
def convert_pretrained(model, model_path, example_img, save_enc_name,
                       save_dec_name):
    # Pretrained Weights
    encoder_path = os.path.join(model_path, model, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, model, "depth.pth")

    # Model Architechture
    encoder = networks.ResnetEncoder(18, False)
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    # Load pretrained weights into model
    try:
        loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
    except FileNotFoundError as err:
        print("{} Cannot load encoder file {}".format(err, encoder_path))

    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)

    try:
        loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
    except FileNotFoundError as err:
        print("{} Cannot load decoder file {}".format(err, depth_decoder_path))

    depth_decoder.load_state_dict(loaded_dict)

    # Set to Eval mode
    encoder.eval()
    depth_decoder.eval()

    # Forward
    image = load_example_image(example_img, loaded_dict_enc)
    with torch.no_grad():
        # Encoder
        gt_features = encoder.forward_original(image)
        features = encoder(image)
        verify_encoder(features, gt_features)

        # Decoder
        gt_outputs = depth_decoder.forward_original(features)
        outputs = depth_decoder(*features)
        verify_decoder(outputs, gt_outputs)

    # JIT Trace
    encoder_module = torch.jit.trace(encoder, image)
    depth_decoder_module = torch.jit.trace(depth_decoder, features)

    # Serialize & Save
    t_encoder_path = os.path.join(model_path, model, save_enc_name)
    t_depth_decoder_path = os.path.join(model_path, model, save_dec_name)
    encoder_module.save(t_encoder_path)
    depth_decoder_module.save(t_depth_decoder_path)
def load_encoder(encoder_path):
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()
    return encoder, feed_height, feed_width
    def __init__(self, lr=7e-3, batch_size=1, width=640, height=192):
        super().__init__()
        self.num_classes = 9
        self.model = DeepLab(num_classes=self.num_classes)

        self.depth_encoder = networks.ResnetEncoder(18, True)
        self.depth_decoder = networks.DepthDecoder(
            num_ch_enc=self.depth_encoder.num_ch_enc, scales=range(4))

        model_name = 'mono+stereo_640x192'
        model_path = os.path.join("models", "monodepth2_weights", model_name)
        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")
        loaded_dict_enc = torch.load(encoder_path, map_location=device)
        feed_height = loaded_dict_enc['height']
        feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self.depth_encoder.state_dict()
        }
        self.depth_encoder.load_state_dict(filtered_dict_enc)
        self.depth_encoder.eval()
        loaded_dict = torch.load(depth_decoder_path, map_location=device)
        self.depth_decoder.load_state_dict(loaded_dict)
        self.depth_decoder.eval()

        self.train_loss = []
        self.loss_decomp = {'seed': [], 'dCRF': [], 'proj': []}
        self.val_loss = []
        self.test_loss = []
        self.rloss_weight = 2e-9  #2e-9
        self.rloss_scale = 1
        self.rloss_sig_rgb = 25
        self.rloss_sig_xy = 30
        self.ploss_weight = 0.5
        self.lr = lr
        self.width = width
        self.height = height
        self.densecrflosslayer = DenseCRFLoss(weight=1,
                                              sigma_rgb=self.rloss_sig_rgb,
                                              sigma_xy=self.rloss_sig_xy,
                                              scale_factor=self.rloss_scale)
        self.backproject_depth = BackprojectDepth(batch_size, height, width)
        self.project_3d = Project3D(batch_size, height, width)
        self.ssim = SSIM()
        self.no_ssim = True
        self.use_depth_rloss = True
    def __init__(self, cls_model, lr=7e-3, encoder_depth=18):
        super().__init__()
        self.num_classes = 9
        self.encoder = networks.ResnetEncoder(encoder_depth, False)
        self.cls_model = cls_model
        self.cls_model.eval()

        model_name = 'mono+stereo_640x192'
        model_path = os.path.join("models", model_name)
        print("-> Loading model from ", model_path)
        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")
        loaded_dict_enc = torch.load(encoder_path, map_location=device)
        feed_height = loaded_dict_enc['height']
        feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self.encoder.state_dict()
        }
        self.encoder.load_state_dict(filtered_dict_enc)
        # self.encoder.eval();

        self.decoder = networks.DepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc,
            scales=range(5),
            num_output_channels=self.num_classes)

        self.depth_decoder = DebugDepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(4))
        loaded_dict = torch.load(depth_decoder_path, map_location=device)
        self.depth_decoder.load_state_dict(loaded_dict)
        # self.decoder.eval();

        self.encoder_out_channels = self.encoder.num_ch_enc[-1]
        self.train_loss = []
        self.loss_decomp = {'seed': [], 'dCRF': []}
        self.val_loss = []
        self.test_loss = []
        self.rloss_weight = 2e-9  #2e-9
        self.rloss_scale = 0.5
        self.rloss_sig_rgb = 15
        self.rloss_sig_xy = 100
        self.lr = lr
        self.densecrflosslayer = DenseCRFLoss(weight=self.rloss_weight,
                                              sigma_rgb=self.rloss_sig_rgb,
                                              sigma_xy=self.rloss_sig_xy,
                                              scale_factor=self.rloss_scale)
Example #19
0
    def __init__(self):
        super().__init__()
        self.num_classes = 9
        self.cam_thresh = 0.9
        self.encoder_cls = networks.ResnetEncoder(18, True)

        self.encoder_out_channels = self.encoder_cls.num_ch_enc[-1]
        self.classifer_conv1 = nn.Conv2d(self.encoder_out_channels,
                                         1024,
                                         3,
                                         padding=1)
        self.classifer_drop1 = nn.Dropout2d(p=0.5)
        self.classifer_conv2 = nn.Conv2d(1024, self.num_classes, 1, bias=False)
        self.train_loss = []
        self.val_loss = []
        self.test_loss = []
        self.lr = 1e-3
Example #20
0
def prepare_model_for_test(opt):
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
    print("-> Loading weights from {}".format(opt.load_weights_folder))
    pose_encoder_path = os.path.join(opt.load_weights_folder,
                                     "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)

    pose_encoder.load_state_dict(torch.load(pose_encoder_path))
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda().eval()
    pose_decoder.cuda().eval()

    return pose_encoder, pose_decoder
Example #21
0
    def __init__(self, *args, **kwargs):
        super(Monodepth2, self).__init__(*args, **kwargs)
        self.load_dict = False


        if self.running_on == 'pc':
            #mono_1024x320
            #mono_640x192
            self.encoder_path = "/home/roit/models/monodepth2_official/mono_1024x320/encoder.pth"
            self.depth_decoder_path = "/home/roit/models/monodepth2_official/mono_1024x320/depth.pth"
        elif self.running_on == 'Xavier':
            self.encoder_path = "/home/wang/970evop1/models/mono_640x192/encoder.pth"
            self.depth_decoder_path = "/home/wang/970evop1/models/mono_640x192/depth.pth"


        # encoder init
        self.encoder = networks.ResnetEncoder(18, False)

        if self.load_dict:
            self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device)
            self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()}
            self.encoder.load_state_dict(self.filtered_dict_enc)
        self.encoder.to(self.device)
        self.encoder.eval()

        # decoder
        if self.name=='arch2':
            self.decoder = networks.DepthDecoder2([64, 64, 128, 256, 512])
        else:
            self.decoder = networks.DepthDecoder([64, 64, 128, 256, 512])

        if self.load_dict:
            self.loaded_dict_dec = torch.load(self.depth_decoder_path, map_location=self.device)
            self.filtered_dict_dec = {k: v for k, v in self.loaded_dict_dec.items() if k in self.decoder.state_dict()}
            self.decoder.load_state_dict(self.filtered_dict_dec)
        self.decoder.to(self.device)
        self.decoder.eval()

        ## inputs size
        # self.feed_height = self.loaded_dict_enc['height']
        # self.feed_width = self.loaded_dict_enc['width']


        print('==> model name:{}\nfeed_height:{}\nfeed_width:{}\n'.format(self.name,self.feed_height,self.feed_width))
Example #22
0
    def val(self):
        """Validate the model on a single minibatch
        """
        self.set_eval()

        modelnames = ['intconstrainWallPoleBs', 'intconstrainWallPole', 'intconstrainWall', 'intconstrainPole', 'intconstrainPole2', 'intconstrainPole3', 'intconstrainPole4', 'intconstrainPole5']

        for k in range(len(modelnames)):
            vlsroot = os.path.join(self.opt.vlsfold, modelnames[k])
            os.makedirs(vlsroot, exist_ok=True)

            self.models["encoder"] = networks.ResnetEncoder(self.opt.num_layers, pretrained=True)
            self.models["encoder"].to(self.device)
            self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc, num_output_channels=1)
            self.models["depth"].to(self.device)

            models_to_load = ['encoder', 'depth']
            for n in models_to_load:
                path = os.path.join(self.opt.load_weights_folder, modelnames[k],'models', 'best_a1_models', "{}.pth".format(n))
                model_dict = self.models[n].state_dict()
                pretrained_dict = torch.load(path)
                pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
                model_dict.update(pretrained_dict)
                self.models[n].load_state_dict(model_dict)

            with torch.no_grad():
                for batch_idx, inputs in enumerate(self.val_loader):
                    for key, ipt in inputs.items():
                        if not key == 'tag':
                            inputs[key] = ipt.to(self.device)

                    _, _, gt_height, gt_width = inputs['depthgt'].shape

                    outputs_depth = self.models['depth'](self.models['encoder'](inputs['color']))
                    _, pred_depth = disp_to_depth(outputs_depth[("disp", 0)], self.opt.min_depth, self.opt.max_depth)
                    pred_depth = pred_depth * self.STEREO_SCALE_FACTOR
                    pred_depth = F.interpolate(pred_depth, [gt_height, gt_width], mode='bilinear', align_corners=True)

                    figname = "{}_{}.png".format(inputs['tag'][0].split(' ')[0].split('/')[1], inputs['tag'][0].split(' ')[1])
                    pred_depthnp = (pred_depth[0,0,:,:].cpu().numpy() * 256.0).astype(np.uint16)
                    pil.fromarray(pred_depthnp).save(os.path.join(vlsroot, figname))
def prepare_model_for_test(opt):
    opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder)
    print("-> Loading weights from {}".format(opt.load_weights_folder))
    encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth")
    decoder_path = os.path.join(opt.load_weights_folder, "depth.pth")
    encoder_dict = torch.load(encoder_path)
    decoder_dict = torch.load(decoder_path)

    encoder = networks.ResnetEncoder(opt.num_layers, False)
    depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, 
            scales=range(1), 
            upsample_mode='bilinear'
    )

    encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in encoder.state_dict()})
    depth_decoder.load_state_dict(torch.load(decoder_path))
    
    encoder.cuda().eval()
    depth_decoder.cuda().eval()
    
    return encoder, depth_decoder, encoder_dict['height'], encoder_dict['width']
Example #24
0
def init_model(model_name='mono+stereo_640x192'):

    if not model_dict['initialized']:
        download_model_if_doesnt_exist(model_name)
        model_path = os.path.join(
            f"{os.path.dirname(os.path.realpath(__file__))}/models",
            model_name)
        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")

        # LOADING PRETRAINED MODEL
        encoder = networks.ResnetEncoder(18, False)
        loaded_dict_enc = torch.load(encoder_path, map_location=device)

        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
        }
        encoder.load_state_dict(filtered_dict_enc)

        encoder.to(device)
        encoder.eval()

        depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                              scales=range(4))

        loaded_dict = torch.load(depth_decoder_path, map_location=device)
        depth_decoder.load_state_dict(loaded_dict)

        depth_decoder.to(device)
        depth_decoder.eval()

        # extract the height and width of image that this model was trained with
        model_dict['feed_width'] = loaded_dict_enc['width']
        model_dict['feed_height'] = loaded_dict_enc['height']

        model_dict['encoder'] = encoder
        model_dict['decoder'] = depth_decoder

        model_dict['initialized'] = True
def setup_network(model_name="mono_640x192"):
    download_model_if_doesnt_exist(model_name)
    encoder_path = os.path.join("models", model_name, "encoder.pth")
    depth_decoder_path = os.path.join("models", model_name, "depth.pth")

    # LOADING PRETRAINED MODEL
    encoder = networks.ResnetEncoder(18, False)
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))
    loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
    depth_decoder.load_state_dict(loaded_dict)

    encoder.eval()
    depth_decoder.eval()

    return encoder, depth_decoder, loaded_dict_enc
Example #26
0
def load_model():

    model_name = 'mono+stereo_640x192'

    device = torch.device("cuda")

    model_path = os.path.join("models", model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    return feed_width, feed_height, encoder, depth_decoder, device
Example #27
0
def prepare_model_for_test(args, device):
    model_path = args.model_name
    print("-> Loading model from ", model_path)
    model_path = os.path.join("ckpts", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    decoder_path = os.path.join(model_path, "depth.pth")
    encoder_dict = torch.load(encoder_path, map_location=device)
    decoder_dict = torch.load(decoder_path, map_location=device)

    encoder = networks.ResnetEncoder(18, False)
    decoder = networks.DepthDecoder(
        num_ch_enc=encoder.num_ch_enc,
        scales=range(1),
    )

    encoder.load_state_dict(
        {k: v
         for k, v in encoder_dict.items() if k in encoder.state_dict()})
    decoder.load_state_dict(decoder_dict)

    encoder = encoder.to(device).eval()
    decoder = decoder.to(device).eval()

    return encoder, decoder, encoder_dict['height'], encoder_dict['width']
Example #28
0
    def __init__(self, options):
        self.opt = options

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = "cuda"

        self.models["encoder"] = networks.ResnetEncoder(self.opt.num_layers,
                                                        pretrained=True)
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())
        self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc,
                                            num_output_channels=1)
        self.models["depth"].to(self.device)

        self.set_dataset()

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\t  ", self.opt.split)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        self.MIN_DEPTH = 1e-3
        self.MAX_DEPTH = 80

        self.STEREO_SCALE_FACTOR = 5.4
Example #29
0
def load_model(model_name):
    # Set up network and load weights
    if model_name.startswith('office_trim'):
        models_path = abspath('./logs/office')
    else:
        models_path = abspath('./logs')
    weights_path = join(models_path, model_name, 'models',
                        'weights_{}'.format(epoch_num))

    # Load pretrained model
    print('Loading... \nMODEL {}'.format(model_name))
    encoder = networks.ResnetEncoder(18, False)
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(num_scales))
    encoder_path = join(weights_path, 'encoder.pth')
    depth_decoder_path = join(weights_path, 'depth.pth')

    # Load encoder network with weights. Verify encoder architecture
    loaded_dict_enc = torch.load(encoder_path)
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)

    # Load depth decoder network with weights
    loaded_dict = torch.load(depth_decoder_path)
    depth_decoder.load_state_dict(loaded_dict)

    # Set to eval mode on GPU
    encoder.cuda()
    depth_decoder.cuda()
    encoder.eval()
    depth_decoder.eval()

    return depth_decoder, encoder
def evaluate(opt):
    """Evaluate odometry on the KITTI dataset
    """
    assert os.path.isdir(opt.load_weights_folder), \
        "Cannot find a folder at {}".format(opt.load_weights_folder)

    assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \
        "eval_split should be either odom_9 or odom_10"

    sequence_id = int(opt.eval_split.split("_")[1])

    filenames = readlines(
        os.path.join(os.path.dirname(__file__), "splits", "odom",
                     "test_files_{:02d}.txt".format(sequence_id)))

    dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width,
                               [0, 1], 4, is_train=False)
    dataloader = DataLoader(dataset, opt.batch_size, shuffle=False,
                            num_workers=opt.num_workers, pin_memory=True, drop_last=False)

    pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth")
    pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth")

    pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2)
    pose_encoder.load_state_dict(torch.load(pose_encoder_path))

    pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2)
    pose_decoder.load_state_dict(torch.load(pose_decoder_path))

    pose_encoder.cuda()
    pose_encoder.eval()
    pose_decoder.cuda()
    pose_decoder.eval()

    pred_poses = []

    print("-> Computing pose predictions")

    opt.frame_ids = [0, 1]  # pose network only takes two frames as input

    with torch.no_grad():
        for inputs in dataloader:
            for key, ipt in inputs.items():
                inputs[key] = ipt.cuda()

            all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1)

            features = [pose_encoder(all_color_aug)]
            axisangle, translation = pose_decoder(features)

            pred_poses.append(
                transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy())

    pred_poses = np.concatenate(pred_poses)

    gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id))
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in range(0, num_frames - 1):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)