예제 #1
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo
                                 and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained")
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(
                    self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input ==
                    "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        if self.opt.predictive_mask:
            assert self.opt.disable_automasking, \
                "When using predictive_mask, please disable automasking with --disable_automasking"

            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                self.models["encoder"].num_ch_enc,
                self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.models["predictive_mask"].to(self.device)
            self.parameters_to_train += list(
                self.models["predictive_mask"].parameters())

        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data
        self.dataset = datasets.InteriorDataset

        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files.txt")

        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png'

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(self.opt.data_path,
                                     train_filenames,
                                     self.opt.height,
                                     self.opt.width,
                                     self.opt.frame_ids,
                                     4,
                                     is_train=True,
                                     img_ext=img_ext)
        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        val_dataset = self.dataset(self.opt.data_path,
                                   val_filenames,
                                   self.opt.height,
                                   self.opt.width,
                                   self.opt.frame_ids,
                                   4,
                                   is_train=False,
                                   img_ext=img_ext)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        self.save_opts()
예제 #2
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained")
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input == "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        if self.opt.predictive_mask:
            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                self.models["encoder"].num_ch_enc, self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.models["predictive_mask"].to(self.device)
            self.parameters_to_train += list(self.models["predictive_mask"].parameters())

        self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ", self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # dataset options
        datasets_dict = {'kitti': KITTIRAWDataset,
                         'kitti_odom': KITTIOdomDataset,
                         'FLIR': FlirDataset,
                         'KAIST': KAIST_Dataset,
                         'CREOL': CreolDataset,
                         'all_thermal_data': [FlirDataset, KAIST_Dataset, CreolDataset]}

        assert (self.opt.img_ext == '.png') or (self.opt.img_ext == '.jpg') or (
                    self.opt.img_ext == '.jpeg'), "Please provide a correct image extension"

        img_ext = self.opt.img_ext

        self.dataset = datasets_dict[self.opt.dataset]

        if self.opt.dataset != 'all_thermal_data':
            train_filenames, val_filenames, thermal = get_filenames(self.opt.dataset, self.opt.data_path, self.opt.split)

            num_train_samples = len(train_filenames)
            num_val_samples = len(val_filenames)
            self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

            train_dataset = self.dataset(
                self.opt.data_path, train_filenames, self.opt.height, self.opt.width,
                self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, thermal=thermal)

            self.train_loader = DataLoader(
                train_dataset, self.opt.batch_size, True,
                num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)

            val_dataset = self.dataset(
                self.opt.data_path, val_filenames, self.opt.height, self.opt.width,
                self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, thermal = thermal)

            self.val_loader = DataLoader(
                val_dataset, self.opt.batch_size, True,
                num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)

            self.val_iter = iter(self.val_loader)
        else:
            datasets = ['FLIR', 'KAIST', 'CREOL']
            data_paths = ['/groups/mshah/data/FLIR/pre_dat/', '/groups/mshah/data/KAIST_multispectral/', '../robert_video/']

            train_datasets = []
            val_datasets = []

            num_train_samples = 0
            num_val_samples = 0

            for i, dataset in enumerate(self.dataset):
                train_filenames, val_filenames, thermal = get_filenames(datasets[i], data_paths[i], self.opt.split)

                print(datasets[i] + ' train: ' + data_paths[i] + ' - ' + str(len(train_filenames)))
                print(datasets[i] + ' val: ' + data_paths[i] + ' - ' + str(len(val_filenames)))

                num_train_samples += len(train_filenames)
                num_val_samples += len(val_filenames)

                train_datasets.append(dataset(
                    data_paths[i], train_filenames, self.opt.height, self.opt.width,
                    self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, thermal=thermal))

                val_datasets.append(dataset(
                    data_paths[i], val_filenames, self.opt.height, self.opt.width,
                    self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, thermal=thermal))

            self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

            self.train_loader = DataLoader(
                ConcatDataset(train_datasets), self.opt.batch_size, True,
                num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)

            self.val_loader = DataLoader(
                ConcatDataset(val_datasets), self.opt.batch_size, True,
                num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)

            self.val_iter = iter(self.val_loader)

       # self.writers = {}
       # for mode in ["train", "val"]:
       #     self.writers[mode] = SummaryWriter(os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2 ** scale)
            w = self.opt.width // (2 ** scale)

            self.backproject_depth[scale] = BackprojectDepth(self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"]

        if self.opt.dataset.startswith('kitti'):
            print("Using split:\n  ", self.opt.split)
        else:
            print("Using dataset:\n  ", self.opt.dataset)
        
        print("There are {:d} training items and {:d} validation items\n".format(
            num_train_samples, num_val_samples))

        self.save_opts()
예제 #3
0
    def __init__(self, options):
        self.opt = options
        self.refine = options.refine or options.inv_refine
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)
        self.crop_mode = options.crop_mode

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []
        self.parameters_to_train_refine = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo
                                 and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")
        if self.refine:
            self.refine_stage = list(range(options.refine_stage))
            if len(self.refine_stage) > 4:
                self.crop_h = [96, 128, 160, 192, 192]
                self.crop_w = [192, 256, 384, 448, 640]
            else:
                self.crop_h = [96, 128, 160, 192]
                self.crop_w = [192, 256, 384, 640]
            if self.opt.refine_model == 's':
                self.models["mid_refine"] = networks.Simple_Propagate(
                    self.crop_h, self.crop_w, self.crop_mode)
            elif self.opt.refine_model == 'i':
                self.models["mid_refine"] = networks.Iterative_Propagate_old(
                    self.crop_h, self.crop_w, self.crop_mode)
            for param in self.models["mid_refine"].parameters():
                param.requeires_grad = False
            self.models["mid_refine"].to(self.device)
        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers,
            self.opt.weights_init == "pretrained",
            num_input_images=1)
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc,
            self.opt.scales,
            refine=self.refine)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(
                    self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input ==
                    "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        parameters_to_train = self.parameters_to_train
        self.model_optimizer = optim.Adam(parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()
        if self.refine:
            self.models["encoder_nograd"] = copy.deepcopy(
                self.models["encoder"])
            for param in self.models["encoder_nograd"].parameters():
                param.requeires_grad = False
            self.models["encoder_nograd"].to(self.device)
            self.models["depth_nograd"] = copy.deepcopy(self.models["depth"])
            for param in self.models["depth_nograd"].parameters():
                param.requeires_grad = False
            self.models["depth_nograd"].to(self.device)

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data
        datasets_dict = {
            "kitti": datasets.KITTIRAWDataset,
            "kitti_odom": datasets.KITTIOdomDataset,
            "kitti_depth": datasets.KITTIDepthDataset
        }
        self.dataset = datasets_dict[self.opt.dataset]

        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files_p.txt")
        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png' if self.opt.png else '.jpg'

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(self.opt.data_path,
                                     train_filenames,
                                     self.opt.height,
                                     self.opt.width,
                                     self.opt.frame_ids,
                                     4,
                                     is_train=True,
                                     img_ext=img_ext,
                                     refine=False,
                                     crop_mode=self.crop_mode)
        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        val_dataset = self.dataset(self.opt.data_path,
                                   val_filenames,
                                   self.opt.height,
                                   self.opt.width,
                                   self.opt.frame_ids,
                                   4,
                                   is_train=False,
                                   img_ext=img_ext,
                                   refine=False,
                                   crop_mode=self.crop_mode)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        self.save_opts()
예제 #4
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.toolLayers = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo
                                 and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained")
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(
                    self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input ==
                    "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        if self.opt.predictive_mask:
            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                self.models["encoder"].num_ch_enc,
                self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.models["predictive_mask"].to(self.device)
            self.parameters_to_train += list(
                self.models["predictive_mask"].parameters())

        self.foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18]
        if self.opt.stereo_mask:
            self.toolLayers['compute_stereo_mask'] = StereoMask().cuda()

        if self.opt.typeWiseRegularization:
            self.toolLayers['compsurfnorm'] = ComputeSurfaceNormal(
                height=self.opt.height,
                width=self.opt.width,
                batch_size=self.opt.batch_size).cuda()
            self.toolLayers['typeWReg'] = TypeWiseRegularization().cuda()
            self.wallType = [2, 3, 4]  # Building, wall, fence
            self.roadType = [0, 1, 9]  # road, sidewalk, terrain
            self.permuType = [5, 7]  # Pole, traffic sign
            self.skyType = 10
            self.chanWinSize = 5

        if self.opt.borderWiseRegularization:
            self.wallType = [2, 3, 4]  # Building, wall, fence
            self.roadType = [0, 1, 9]  # road, sidewalk, terrain
            self.toolLayers['borderWiseReg'] = BorderWiseRegularization(
                batchNum=self.opt.batch_size,
                width=self.opt.width,
                height=self.opt.height).cuda()

        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data
        datasets_dict = {
            "kitti": datasets.KITTIRAWDataset,
            "kitti_odom": datasets.KITTIOdomDataset
        }
        self.dataset = datasets_dict[self.opt.dataset]

        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files.txt")

        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png'

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(self.opt.data_path,
                                     train_filenames,
                                     self.opt.height,
                                     self.opt.width,
                                     self.opt.frame_ids,
                                     4,
                                     is_train=True,
                                     img_ext=img_ext)
        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        val_dataset = self.dataset(self.opt.data_path,
                                   val_filenames,
                                   self.opt.height,
                                   self.opt.width,
                                   self.opt.frame_ids,
                                   4,
                                   is_train=False,
                                   img_ext=img_ext)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        self.save_opts()
예제 #5
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        #  默认大小为640×192
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        # "scales used in the loss"
        self.num_scales = len(self.opt.scales)

        # 默认[0, -1, 1], target 对应id为0
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo
                                 and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        # self.opt.num_layers为encoder部分resnet的深度,默认使用ResNet-18
        # 输出5个尺度的features
        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained")
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        # 三种posenet的处理办法,在论文中的Supplementary Material的Table中有对比结果,
        # 从表中的结果来看,separate_resnet效果最好,默认选取separate_resnet
        if self.use_pose_net:
            # 和depth encoder不共享参数
            # pose encoder部分将两张图像在通道维度堆叠为6个通道,输出一个features
            # pose decoder部分输入一个features,输出两个pose
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(
                    self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            # 和depth encoder共享参数
            # encoder部分分别输入一张图像(类似孪生网络)
            # decoder部分输入两个features,输出一个pose
            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            # posecnn为 Learning Depth from Monocular Videos using Direct Methods 中提出的方法,
            # 参考https://arxiv.org/pdf/1712.00175.pdf
            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input ==
                    "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        # 这个mask对应的是sfmlearner的mask
        if self.opt.predictive_mask:
            assert self.opt.disable_automasking, \
                "When using predictive_mask, please disable automasking with --disable_automasking"

            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                self.models["encoder"].num_ch_enc,
                self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.models["predictive_mask"].to(self.device)
            self.parameters_to_train += list(
                self.models["predictive_mask"].parameters())

        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data
        datasets_dict = {
            "kitti": datasets.KITTIRAWDataset,
            "kitti_odom": datasets.KITTIOdomDataset
        }
        self.dataset = datasets_dict[self.opt.dataset]

        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files.txt")

        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png' if self.opt.png else '.jpg'

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(self.opt.data_path,
                                     train_filenames,
                                     self.opt.height,
                                     self.opt.width,
                                     self.opt.frame_ids,
                                     4,
                                     is_train=True,
                                     img_ext=img_ext)
        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        val_dataset = self.dataset(self.opt.data_path,
                                   val_filenames,
                                   self.opt.height,
                                   self.opt.width,
                                   self.opt.frame_ids,
                                   4,
                                   is_train=False,
                                   img_ext=img_ext)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        # if set, disables ssim in the loss
        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        # save options
        self.save_opts()
예제 #6
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []
        self.depth_parameters_to_train = []
        self.pose_parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.models["encoder"] = networks.PackResNetEncoder()
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())
        self.depth_parameters_to_train += list(
            self.models["encoder"].parameters())

        self.models["depth"] = networks.UnPackDepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())
        self.depth_parameters_to_train += list(
            self.models["depth"].parameters())

        self.models["pose"] = networks.PoseCNN(
            self.num_input_frames if self.opt.pose_model_input == "all" else 2)

        self.models["pose"].to(self.device)
        self.parameters_to_train += list(self.models["pose"].parameters())
        self.pose_parameters_to_train += list(self.models["pose"].parameters())

        # self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.depth_learning_rate)
        self.depth_model_optimizer = optim.Adam(self.depth_parameters_to_train,
                                                self.opt.depth_learning_rate)
        self.pose_model_optimizer = optim.Adam(self.pose_parameters_to_train,
                                               self.opt.pose_learning_rate)
        # self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate)
        # self.model_lr_scheduler = optim.lr_scheduler.StepLR(self.model_optimizer, self.opt.scheduler_step_size, 0.5)
        self.depth_model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.depth_model_optimizer, self.opt.scheduler_step_size, 0.1)
        self.pose_model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.pose_model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data
        datasets_dict = {
            "kitti": datasets.KITTIRDVTDataset,
            "kitti_odom": datasets.KITTIOdomDataset
        }
        self.dataset = datasets_dict[self.opt.dataset]

        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files.txt")

        train_filenames = readlines(fpath.format("train"))
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png' if self.opt.png else '.jpg'

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(self.opt.data_path,
                                     train_filenames,
                                     self.opt.height,
                                     self.opt.width,
                                     self.opt.frame_ids,
                                     4,
                                     is_train=True,
                                     img_ext=img_ext)
        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        val_dataset = self.dataset(self.opt.data_path,
                                   val_filenames,
                                   self.opt.height,
                                   self.opt.width,
                                   self.opt.frame_ids,
                                   4,
                                   is_train=False,
                                   img_ext=img_ext)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        self.save_opts()
예제 #7
0
    def __init__(self, options):

        self.opt = options

        self.debug = self.opt.debug
        print('DEBUG: ', self.debug)

        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = True

        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained")
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(
                    self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input ==
                    "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.MultiStepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)
        print("Training is using frames: \n  ", self.opt.frame_ids_to_train)

        # data
        datasets_dict = {"nyu": datasets.NYUDataset}
        self.dataset = datasets_dict[self.opt.dataset]

        train_filenames = readlines('./splits/nyu_train_0_10_20_30_40.txt')

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(self.opt.data_path,
                                     train_filenames,
                                     self.opt.height,
                                     self.opt.width,
                                     self.opt.frame_ids,
                                     1,
                                     is_train=True,
                                     segment_path=self.opt.segment_path,
                                     return_segment=True,
                                     shared_dict=shared_dict)

        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)

        # validation
        filenames = readlines('./splits/nyu_test.txt')
        # filenames = [filename.replace("/p300/Code/self_depth/monodepth2/nyuv2/nyu_official",
        #                               self.opt.val_path) for filename in filenames]
        val_dataset = datasets.NYUDataset(self.opt.val_path,
                                          filenames,
                                          self.opt.height,
                                          self.opt.width, [0],
                                          1,
                                          is_train=False,
                                          return_segment=False)
        self.val_dataloader = DataLoader(val_dataset,
                                         1,
                                         shuffle=False,
                                         num_workers=2)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        self.ssim_sparse = SSIM_sparse()
        self.ssim_sparse.to(self.device)

        self.backproject_depth = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), -1))

        self.save_opts()
예제 #8
0
파일: trainer.py 프로젝트: wf-hahaha/SLAM
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained")
        self.models["encoder"].to(self.device)
        self.parameters_to_train += list(self.models["encoder"].parameters())

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc, self.opt.scales)
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(self.models["depth"].parameters())

        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(self.models["pose_encoder"].parameters())

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input == "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())

        if self.opt.predictive_mask:
            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                self.models["encoder"].num_ch_enc, self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.models["predictive_mask"].to(self.device)
            self.parameters_to_train += list(self.models["predictive_mask"].parameters())

        self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ", self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data
        datasets_dict = {'kitti': KITTIRAWDataset,
                         'kitti_odom': KITTIOdomDataset,
                         'FLIR': FlirDataset,
                         'KAIST': KAIST_Dataset}
        
        self.dataset = datasets_dict[self.opt.dataset]
        
        thermal = False
        if self.opt.dataset == 'FLIR':
            train_filenames = []

            train_files = os.listdir(os.path.join(self.opt.data_path, 'train/PreviewData/'))
            train_files.sort()
            train_filenames.extend(os.path.join(self.opt.data_path, 'train/PreviewData/') + 
                                   file for file in train_files[1:-1])

            video_files = os.listdir(os.path.join(self.opt.data_path, 'video/PreviewData/'))
            video_files.sort()
            train_filenames.extend(os.path.join(self.opt.data_path, 'video/PreviewData/') + 
                                   file for file in video_files[1:-1])

            val_filenames = []
            val_files = os.listdir(os.path.join(self.opt.data_path, 'valid/PreviewData/'))
            val_files.sort()
            val_filenames.extend(os.path.join(self.opt.data_path, 'valid/PreviewData/') + 
                                   file for file in val_files[1:-1])
            thermal = True 
        elif self.opt.dataset == 'KAIST':
            train_files = os.path.join(self.opt.data_path, 'training')
            train_filenames = []

            campus_train = os.listdir(os.path.join(train_files, 'Campus/THERMAL/'))
            campus_train.sort()
            residential_train = os.listdir(os.path.join(train_files, 'Residential/THERMAL/'))
            residential_train.sort()
            urban_train = os.listdir(os.path.join(train_files, 'Urban/THERMAL/'))
            urban_train.sort()

            train_filenames.extend(os.path.join(train_files, 'Campus/THERMAL/') +
                                   file for file in campus_train[1:-1])
            train_filenames.extend(os.path.join(train_files, 'Residential/THERMAL/') +
                                   file for file in residential_train[1:-1])
            train_filenames.extend(os.path.join(train_files, 'Urban/THERMAL/') + 
                                   file for file in urban_train[1:-1])
            
            val_files = os.path.join(self.opt.data_path, 'testing')
            val_filenames = []

            campus_val = os.listdir(os.path.join(val_files, 'Campus/THERMAL/'))
            campus_val.sort()
            residential_val = os.listdir(os.path.join(val_files, 'Residential/THERMAL/'))
            residential_val.sort()
            urban_val = os.listdir(os.path.join(val_files, 'Urban/THERMAL/'))
            urban_val.sort()

            val_filenames.extend(os.path.join(val_files, 'Campus/THERMAL/') + 
                                   file for file in campus_val[1:-1])
            val_filenames.extend(os.path.join(val_files, 'Residential/THERMAL/') + 
                                   file for file in residential_val[1:-1])
            val_filenames.extend(os.path.join(val_files, 'Urban/THERMAL/') + 
                                   file for file in urban_val[1:-1])
            thermal = True
        else:
            fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt")
            train_filenames = readlines(fpath.format("train"))
            val_filenames = readlines(fpath.format("val"))
            
        assert (self.opt.img_ext == '.png') or (self.opt.img_ext == '.jpg') or (self.opt.img_ext == '.jpeg'), "Please provide a correct image extension"
        
        img_ext = self.opt.img_ext

        num_train_samples = len(train_filenames)
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset = self.dataset(
            self.opt.data_path, train_filenames, self.opt.height, self.opt.width,
            self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, thermal=thermal)
        self.train_loader = DataLoader(
            train_dataset, self.opt.batch_size, True,
            num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)
        val_dataset = self.dataset(
            self.opt.data_path, val_filenames, self.opt.height, self.opt.width,
            self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, thermal = thermal)
        self.val_loader = DataLoader(
            val_dataset, self.opt.batch_size, True,
            num_workers=self.opt.num_workers, pin_memory=True, drop_last=True)
        self.val_iter = iter(self.val_loader)

       # self.writers = {}
       # for mode in ["train", "val"]:
       #     self.writers[mode] = SummaryWriter(os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2 ** scale)
            w = self.opt.width // (2 ** scale)

            self.backproject_depth[scale] = BackprojectDepth(self.opt.batch_size, h, w)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"]

        if self.opt.dataset.startswith('kitti'):
            print("Using split:\n  ", self.opt.split)
        else:
            print("Using dataset:\n  ", self.opt.dataset)
        
        print("There are {:d} training items and {:d} validation items\n".format(
            len(train_dataset), len(val_dataset)))

        self.save_opts()
예제 #9
0
    def __init__(self, options):
        self.opt = options
        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)
        # checking height and width are multiples of 32
        assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"
        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else
                                   "cuda")  #指定使用的设备  配合  .to()函数使用  一定要在读取数据之前

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames  #设定pose网络的frames

        assert self.opt.frame_ids[
            0] == 0, "frame_ids must start with 0"  #进行判断,frame的id如果不是从0开始的则报错

        self.use_pose_net = not (
            self.opt.use_stereo and self.opt.frame_ids == [0]
        )  #默认设置了use_stereo则是用的双目,否则就是用单目   !!!且双目的不用多帧!!!

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")  #加s表示是双目的  在最后的一位表示双目

#进行网络的设定,encoder、decoder
        self.models["encoder"] = networks.ResnetEncoder(
            self.opt.num_layers, self.opt.weights_init == "pretrained",
            self.opt.BA2M, self.opt.CBAM, self.opt.BAM)
        self.models["encoder"].to(self.device)  #一定要在读取数据之前
        self.parameters_to_train += list(
            self.models["encoder"].parameters())  #获取网络的参数!!!!!

        self.models["depth"] = networks.DepthDecoder(
            self.models["encoder"].num_ch_enc,
            self.opt.scales)  #num_ch_enc在哪里加进去的????
        self.models["depth"].to(self.device)
        self.parameters_to_train += list(
            self.models["depth"].parameters())  #获取网络的参数!!!!!

        #使用的pose网络
        if self.use_pose_net:
            if self.opt.pose_model_type == "separate_resnet":  #确定encoder层是和depth共享还是不是贡献
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    self.opt.BA2M,
                    self.opt.CBAM,
                    self.opt.BAM,
                    num_input_images=self.num_pose_frames)

                self.models["pose_encoder"].to(self.device)
                self.parameters_to_train += list(
                    self.models["pose_encoder"].parameters())  #获取网络的参数!!!!!

                self.models["pose"] = networks.PoseDecoder(
                    self.models["pose_encoder"].num_ch_enc,
                    num_input_features=1,
                    num_frames_to_predict_for=2)
                #注意pose网络的decoder部分没有像之前获取保存网络的参数!!!
            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    self.models["encoder"].num_ch_enc, self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    self.num_input_frames if self.opt.pose_model_input ==
                    "all" else 2)

            self.models["pose"].to(self.device)
            self.parameters_to_train += list(self.models["pose"].parameters())
            #那是因为最后再进行保存!!!!!!!!!!!!!!!!!!!!!!

#是否使用本文的auto-masking
        if self.opt.predictive_mask:
            assert self.opt.disable_automasking, \
                "When using predictive_mask, please disable automasking with --disable_automasking"

            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                self.models["encoder"].num_ch_enc,
                self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.models["predictive_mask"].to(self.device)
            self.parameters_to_train += list(
                self.models["predictive_mask"].parameters())

#进行参数的优化并动态调整学习率
        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1
        )  #调整学习率的  new_lr = 0.1 * lr  调整间隔为shceduler_step_size【也就是epoch】

        #如果要load模型,调用load_model()加载模型
        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print("Training is using:\n  ", self.device)

        # data部分
        datasets_dict = {
            "kitti": datasets.KITTIRAWDataset,
            "kitti_odom": datasets.KITTIOdomDataset
        }
        self.dataset = datasets_dict[self.opt.dataset]  #假如是KITTIRAWDAtaset

        fpath = os.path.join(os.path.dirname(__file__), "splits",
                             self.opt.split, "{}_files.txt")
        #os.path.join(path, "", "")将几个字符串连接起来当作新的路径
        #os.path.dirname(__file__)  获得当前脚本的绝对路径
        #确定哪种方式进行训练测试

        train_filenames = readlines(
            fpath.format("train"))  #此处的.format连接前面的参数fpath中的{}
        val_filenames = readlines(fpath.format("val"))
        img_ext = '.png' if self.opt.png else '.jpg'

        #得到作为训练和测试的训练样本的名字

        num_train_samples = len(train_filenames)  #训练的总数据量
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs  #计算总格的steps数,每个batch过后将会更新一次参数

        train_dataset = self.dataset(
            self.opt.data_path,
            train_filenames,
            self.opt.height,
            self.opt.width,  #此处是对MonoDataset部分的初始化
            self.opt.frame_ids,
            4,
            is_train=True,
            img_ext=img_ext)  #对数据集进行一定的设定,观察kitti或者kitti_odom中的数据为继承!!!!!
        self.train_loader = DataLoader(
            train_dataset,
            self.opt.batch_size,
            True,
            num_workers=self.opt.num_workers,
            pin_memory=True,
            drop_last=True)  #pin_memory表示锁页内存,显卡里的内存全是锁页内存,里面的内容不会与主机的虚拟内存进行交换
        #加载数据,DataLoader(dataset=torch_dataset,batch_size = BATCH_SIZE, shuffle = True, num_works = 2)
        # shuffle:表示是否打乱数据   num_workd表示多线程 默认线程数为2
        val_dataset = self.dataset(self.opt.data_path,
                                   val_filenames,
                                   self.opt.height,
                                   self.opt.width,
                                   self.opt.frame_ids,
                                   4,
                                   is_train=False,
                                   img_ext=img_ext)
        self.val_loader = DataLoader(  #加载data
            val_dataset,
            self.opt.batch_size,
            True,
            num_workers=self.opt.num_workers,
            pin_memory=True,
            drop_last=True)
        self.val_iter = iter(self.val_loader)  #这里不是很理解是什么意思

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = SSIM()  #此处的SSIM在Layers层中
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.opt.height // (2**scale)
            w = self.opt.width // (2**scale)

            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w)  #BackprojectDepth将depth转化成3D cloud
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w)
            self.project_3d[scale].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        self.save_opts()  #将进行的操作保存起来
예제 #10
0
파일: trainer.py 프로젝트: rnett/monodepth2
    def __init__(self, options):
        self.opt = options

        self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name)

        Path(self.log_path).mkdir(exist_ok=True, parents=True)
        (Path(self.log_path) / "command").open('w+').write(" ".join(sys.argv))

        # checking height and width are multiples of 32
        # assert self.opt.height % 32 == 0, "'height' must be a multiple of 32"
        # assert self.opt.width % 32 == 0, "'width' must be a multiple of 32"

        self.models = {}
        self.parameters_to_train = []

        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")
        self.parallel = not self.opt.no_cuda and torch.cuda.device_count() > 1

        if self.parallel and self.opt.mode is Mode.Cubemap:
            assert self.opt.batch_size % torch.cuda.device_count() == 0, f"Cubemap batch size ({self.opt.batch_size})" \
                                                                         f" must be evenly divisible by the number of" \
                                                                         f" GPUs ({torch.cuda.device_count()})"

        self.num_scales = len(self.opt.scales)
        self.num_input_frames = len(self.opt.frame_ids)
        self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames

        assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0"

        self.use_pose_net = not (self.opt.use_stereo
                                 and self.opt.frame_ids == [0])

        if self.opt.use_stereo:
            self.opt.frame_ids.append("s")

        conv_layer, data_lambda, intrinsics = get_params(options)
        self.intrinsics = intrinsics

        self.height = self.opt.height or self.intrinsics.height
        self.width = self.opt.width or self.intrinsics.width

        self.models["encoder"] = networks.ResnetEncoder(
            conv_layer, self.opt.num_layers,
            self.opt.weights_init == "pretrained")
        self.store_model("encoder")

        self.models["depth"] = networks.DepthDecoder(
            conv_layer, self.get_num_ch_enc(self.models["encoder"]),
            self.opt.scales)
        self.store_model("depth")

        if self.use_pose_net:  # true
            if self.opt.pose_model_type == "separate_resnet":  # true
                self.models["pose_encoder"] = networks.ResnetEncoder(
                    conv_layer,
                    self.opt.num_layers,
                    self.opt.weights_init == "pretrained",
                    num_input_images=self.num_pose_frames)
                self.store_model("pose_encoder")

                self.models["pose"] = networks.PoseDecoder(
                    conv_layer,
                    self.get_num_ch_enc(self.models["pose_encoder"]),
                    num_input_features=1,
                    num_frames_to_predict_for=2)

            elif self.opt.pose_model_type == "shared":
                self.models["pose"] = networks.PoseDecoder(
                    conv_layer, self.get_num_ch_enc(self.models["encoder"]),
                    self.num_pose_frames)

            elif self.opt.pose_model_type == "posecnn":
                self.models["pose"] = networks.PoseCNN(
                    conv_layer, self.num_input_frames
                    if self.opt.pose_model_input == "all" else 2)

            self.store_model("pose")

        if self.opt.predictive_mask:  # false
            assert self.opt.disable_automasking, \
                "When using predictive_mask, please disable automasking with --disable_automasking"

            # Our implementation of the predictive masking baseline has the the same architecture
            # as our depth decoder. We predict a separate mask for each source frame.
            self.models["predictive_mask"] = networks.DepthDecoder(
                conv_layer,
                self.get_num_ch_enc(self.models["encoder"]),
                self.opt.scales,
                num_output_channels=(len(self.opt.frame_ids) - 1))
            self.store_model("predictive_mask")

        self.model_optimizer = optim.Adam(self.parameters_to_train,
                                          self.opt.learning_rate)
        self.model_lr_scheduler = optim.lr_scheduler.StepLR(
            self.model_optimizer, self.opt.scheduler_step_size, 0.1)

        if self.opt.load_weights_folder is not None:
            self.load_model()

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.opt.log_dir)
        print(
            "Training is using:\n  ", f"{self.device}" +
            (f" on {torch.cuda.device_count()} GPUs" if self.parallel else ""))

        num_train_samples = len(load_csv(options.train_data)) * 1000
        self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs

        train_dataset, val_dataset = get_datasets(options, data_lambda,
                                                  intrinsics)

        self.train_loader = DataLoader(train_dataset,
                                       self.opt.batch_size,
                                       True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)
        self.val_loader = DataLoader(val_dataset,
                                     self.opt.batch_size,
                                     True,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)
        self.val_iter = iter(self.val_loader)

        self.writers = {}
        for mode in ["train", "val"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        if not self.opt.no_ssim:
            self.ssim = self.wrap_model(SSIM())  # TODO can I parallelize?
            self.ssim.to(self.device)

        self.backproject_depth = {}
        self.project_3d = {}
        for scale in self.opt.scales:
            h = self.height // (2**scale)
            w = self.width // (2**scale)

            # TODO should be able to paralalize
            self.backproject_depth[scale] = BackprojectDepth(
                self.opt.batch_size, h, w, options.mode)
            self.backproject_depth[scale].to(self.device)

            self.project_3d[scale] = Project3D(self.opt.batch_size, h, w,
                                               options.mode)
            self.project_3d[scale].to(self.device)

        if options.mode is Mode.Cubemap:
            self.models["cube_pose_and_loss"] = self.wrap_model(
                CubePosesAndLoss())
            self.models["cube_pose_and_loss"].to(self.device)

        self.depth_metric_names = [
            "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1",
            "da/a2", "da/a3"
        ]

        self.train_items = len(train_dataset)
        self.val_items = len(val_dataset)

        print("Using split:\n  ", self.opt.split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                self.train_items, self.val_items))

        self.save_opts()