def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained") self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list( self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) if self.opt.predictive_mask: assert self.opt.disable_automasking, \ "When using predictive_mask, please disable automasking with --disable_automasking" # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.models["predictive_mask"].to(self.device) self.parameters_to_train += list( self.models["predictive_mask"].parameters()) self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data self.dataset = datasets.InteriorDataset fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) img_ext = '.png' num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset(self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset(self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts()
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained") self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list(self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) if self.opt.predictive_mask: # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.models["predictive_mask"].to(self.device) self.parameters_to_train += list(self.models["predictive_mask"].parameters()) self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # dataset options datasets_dict = {'kitti': KITTIRAWDataset, 'kitti_odom': KITTIOdomDataset, 'FLIR': FlirDataset, 'KAIST': KAIST_Dataset, 'CREOL': CreolDataset, 'all_thermal_data': [FlirDataset, KAIST_Dataset, CreolDataset]} assert (self.opt.img_ext == '.png') or (self.opt.img_ext == '.jpg') or ( self.opt.img_ext == '.jpeg'), "Please provide a correct image extension" img_ext = self.opt.img_ext self.dataset = datasets_dict[self.opt.dataset] if self.opt.dataset != 'all_thermal_data': train_filenames, val_filenames, thermal = get_filenames(self.opt.dataset, self.opt.data_path, self.opt.split) num_train_samples = len(train_filenames) num_val_samples = len(val_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset( self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, thermal=thermal) self.train_loader = DataLoader( train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, thermal = thermal) self.val_loader = DataLoader( val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) else: datasets = ['FLIR', 'KAIST', 'CREOL'] data_paths = ['/groups/mshah/data/FLIR/pre_dat/', '/groups/mshah/data/KAIST_multispectral/', '../robert_video/'] train_datasets = [] val_datasets = [] num_train_samples = 0 num_val_samples = 0 for i, dataset in enumerate(self.dataset): train_filenames, val_filenames, thermal = get_filenames(datasets[i], data_paths[i], self.opt.split) print(datasets[i] + ' train: ' + data_paths[i] + ' - ' + str(len(train_filenames))) print(datasets[i] + ' val: ' + data_paths[i] + ' - ' + str(len(val_filenames))) num_train_samples += len(train_filenames) num_val_samples += len(val_filenames) train_datasets.append(dataset( data_paths[i], train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, thermal=thermal)) val_datasets.append(dataset( data_paths[i], val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, thermal=thermal)) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs self.train_loader = DataLoader( ConcatDataset(train_datasets), self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_loader = DataLoader( ConcatDataset(val_datasets), self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) # self.writers = {} # for mode in ["train", "val"]: # self.writers[mode] = SummaryWriter(os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2 ** scale) w = self.opt.width // (2 ** scale) self.backproject_depth[scale] = BackprojectDepth(self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"] if self.opt.dataset.startswith('kitti'): print("Using split:\n ", self.opt.split) else: print("Using dataset:\n ", self.opt.dataset) print("There are {:d} training items and {:d} validation items\n".format( num_train_samples, num_val_samples)) self.save_opts()
def __init__(self, options): self.opt = options self.refine = options.refine or options.inv_refine self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) self.crop_mode = options.crop_mode # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.parameters_to_train_refine = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") if self.refine: self.refine_stage = list(range(options.refine_stage)) if len(self.refine_stage) > 4: self.crop_h = [96, 128, 160, 192, 192] self.crop_w = [192, 256, 384, 448, 640] else: self.crop_h = [96, 128, 160, 192] self.crop_w = [192, 256, 384, 640] if self.opt.refine_model == 's': self.models["mid_refine"] = networks.Simple_Propagate( self.crop_h, self.crop_w, self.crop_mode) elif self.opt.refine_model == 'i': self.models["mid_refine"] = networks.Iterative_Propagate_old( self.crop_h, self.crop_w, self.crop_mode) for param in self.models["mid_refine"].parameters(): param.requeires_grad = False self.models["mid_refine"].to(self.device) self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=1) self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, refine=self.refine) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list( self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) parameters_to_train = self.parameters_to_train self.model_optimizer = optim.Adam(parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() if self.refine: self.models["encoder_nograd"] = copy.deepcopy( self.models["encoder"]) for param in self.models["encoder_nograd"].parameters(): param.requeires_grad = False self.models["encoder_nograd"].to(self.device) self.models["depth_nograd"] = copy.deepcopy(self.models["depth"]) for param in self.models["depth_nograd"].parameters(): param.requeires_grad = False self.models["depth_nograd"].to(self.device) print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data datasets_dict = { "kitti": datasets.KITTIRAWDataset, "kitti_odom": datasets.KITTIOdomDataset, "kitti_depth": datasets.KITTIDepthDataset } self.dataset = datasets_dict[self.opt.dataset] fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files_p.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) img_ext = '.png' if self.opt.png else '.jpg' num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset(self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, refine=False, crop_mode=self.crop_mode) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset(self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, refine=False, crop_mode=self.crop_mode) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts()
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.toolLayers = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained") self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list( self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) if self.opt.predictive_mask: # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.models["predictive_mask"].to(self.device) self.parameters_to_train += list( self.models["predictive_mask"].parameters()) self.foregroundType = [5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18] if self.opt.stereo_mask: self.toolLayers['compute_stereo_mask'] = StereoMask().cuda() if self.opt.typeWiseRegularization: self.toolLayers['compsurfnorm'] = ComputeSurfaceNormal( height=self.opt.height, width=self.opt.width, batch_size=self.opt.batch_size).cuda() self.toolLayers['typeWReg'] = TypeWiseRegularization().cuda() self.wallType = [2, 3, 4] # Building, wall, fence self.roadType = [0, 1, 9] # road, sidewalk, terrain self.permuType = [5, 7] # Pole, traffic sign self.skyType = 10 self.chanWinSize = 5 if self.opt.borderWiseRegularization: self.wallType = [2, 3, 4] # Building, wall, fence self.roadType = [0, 1, 9] # road, sidewalk, terrain self.toolLayers['borderWiseReg'] = BorderWiseRegularization( batchNum=self.opt.batch_size, width=self.opt.width, height=self.opt.height).cuda() self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data datasets_dict = { "kitti": datasets.KITTIRAWDataset, "kitti_odom": datasets.KITTIOdomDataset } self.dataset = datasets_dict[self.opt.dataset] fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) img_ext = '.png' num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset(self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset(self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts()
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 # 默认大小为640×192 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") # "scales used in the loss" self.num_scales = len(self.opt.scales) # 默认[0, -1, 1], target 对应id为0 self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") # self.opt.num_layers为encoder部分resnet的深度,默认使用ResNet-18 # 输出5个尺度的features self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained") self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) # 三种posenet的处理办法,在论文中的Supplementary Material的Table中有对比结果, # 从表中的结果来看,separate_resnet效果最好,默认选取separate_resnet if self.use_pose_net: # 和depth encoder不共享参数 # pose encoder部分将两张图像在通道维度堆叠为6个通道,输出一个features # pose decoder部分输入一个features,输出两个pose if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list( self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) # 和depth encoder共享参数 # encoder部分分别输入一张图像(类似孪生网络) # decoder部分输入两个features,输出一个pose elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) # posecnn为 Learning Depth from Monocular Videos using Direct Methods 中提出的方法, # 参考https://arxiv.org/pdf/1712.00175.pdf elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) # 这个mask对应的是sfmlearner的mask if self.opt.predictive_mask: assert self.opt.disable_automasking, \ "When using predictive_mask, please disable automasking with --disable_automasking" # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.models["predictive_mask"].to(self.device) self.parameters_to_train += list( self.models["predictive_mask"].parameters()) self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data datasets_dict = { "kitti": datasets.KITTIRAWDataset, "kitti_odom": datasets.KITTIOdomDataset } self.dataset = datasets_dict[self.opt.dataset] fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) img_ext = '.png' if self.opt.png else '.jpg' num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset(self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset(self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) # if set, disables ssim in the loss if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) # save options self.save_opts()
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.depth_parameters_to_train = [] self.pose_parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.models["encoder"] = networks.PackResNetEncoder() self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.depth_parameters_to_train += list( self.models["encoder"].parameters()) self.models["depth"] = networks.UnPackDepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) self.depth_parameters_to_train += list( self.models["depth"].parameters()) self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) self.pose_parameters_to_train += list(self.models["pose"].parameters()) # self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.depth_learning_rate) self.depth_model_optimizer = optim.Adam(self.depth_parameters_to_train, self.opt.depth_learning_rate) self.pose_model_optimizer = optim.Adam(self.pose_parameters_to_train, self.opt.pose_learning_rate) # self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) # self.model_lr_scheduler = optim.lr_scheduler.StepLR(self.model_optimizer, self.opt.scheduler_step_size, 0.5) self.depth_model_lr_scheduler = optim.lr_scheduler.StepLR( self.depth_model_optimizer, self.opt.scheduler_step_size, 0.1) self.pose_model_lr_scheduler = optim.lr_scheduler.StepLR( self.pose_model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data datasets_dict = { "kitti": datasets.KITTIRDVTDataset, "kitti_odom": datasets.KITTIOdomDataset } self.dataset = datasets_dict[self.opt.dataset] fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) img_ext = '.png' if self.opt.png else '.jpg' num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset(self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset(self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts()
def __init__(self, options): self.opt = options self.debug = self.opt.debug print('DEBUG: ', self.debug) self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = True self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained") self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list( self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.MultiStepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) print("Training is using frames: \n ", self.opt.frame_ids_to_train) # data datasets_dict = {"nyu": datasets.NYUDataset} self.dataset = datasets_dict[self.opt.dataset] train_filenames = readlines('./splits/nyu_train_0_10_20_30_40.txt') num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset(self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 1, is_train=True, segment_path=self.opt.segment_path, return_segment=True, shared_dict=shared_dict) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) # validation filenames = readlines('./splits/nyu_test.txt') # filenames = [filename.replace("/p300/Code/self_depth/monodepth2/nyuv2/nyu_official", # self.opt.val_path) for filename in filenames] val_dataset = datasets.NYUDataset(self.opt.val_path, filenames, self.opt.height, self.opt.width, [0], 1, is_train=False, return_segment=False) self.val_dataloader = DataLoader(val_dataset, 1, shuffle=False, num_workers=2) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) self.ssim_sparse = SSIM_sparse() self.ssim_sparse.to(self.device) self.backproject_depth = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), -1)) self.save_opts()
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained") self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list(self.models["pose_encoder"].parameters()) self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) if self.opt.predictive_mask: # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.models["predictive_mask"].to(self.device) self.parameters_to_train += list(self.models["predictive_mask"].parameters()) self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data datasets_dict = {'kitti': KITTIRAWDataset, 'kitti_odom': KITTIOdomDataset, 'FLIR': FlirDataset, 'KAIST': KAIST_Dataset} self.dataset = datasets_dict[self.opt.dataset] thermal = False if self.opt.dataset == 'FLIR': train_filenames = [] train_files = os.listdir(os.path.join(self.opt.data_path, 'train/PreviewData/')) train_files.sort() train_filenames.extend(os.path.join(self.opt.data_path, 'train/PreviewData/') + file for file in train_files[1:-1]) video_files = os.listdir(os.path.join(self.opt.data_path, 'video/PreviewData/')) video_files.sort() train_filenames.extend(os.path.join(self.opt.data_path, 'video/PreviewData/') + file for file in video_files[1:-1]) val_filenames = [] val_files = os.listdir(os.path.join(self.opt.data_path, 'valid/PreviewData/')) val_files.sort() val_filenames.extend(os.path.join(self.opt.data_path, 'valid/PreviewData/') + file for file in val_files[1:-1]) thermal = True elif self.opt.dataset == 'KAIST': train_files = os.path.join(self.opt.data_path, 'training') train_filenames = [] campus_train = os.listdir(os.path.join(train_files, 'Campus/THERMAL/')) campus_train.sort() residential_train = os.listdir(os.path.join(train_files, 'Residential/THERMAL/')) residential_train.sort() urban_train = os.listdir(os.path.join(train_files, 'Urban/THERMAL/')) urban_train.sort() train_filenames.extend(os.path.join(train_files, 'Campus/THERMAL/') + file for file in campus_train[1:-1]) train_filenames.extend(os.path.join(train_files, 'Residential/THERMAL/') + file for file in residential_train[1:-1]) train_filenames.extend(os.path.join(train_files, 'Urban/THERMAL/') + file for file in urban_train[1:-1]) val_files = os.path.join(self.opt.data_path, 'testing') val_filenames = [] campus_val = os.listdir(os.path.join(val_files, 'Campus/THERMAL/')) campus_val.sort() residential_val = os.listdir(os.path.join(val_files, 'Residential/THERMAL/')) residential_val.sort() urban_val = os.listdir(os.path.join(val_files, 'Urban/THERMAL/')) urban_val.sort() val_filenames.extend(os.path.join(val_files, 'Campus/THERMAL/') + file for file in campus_val[1:-1]) val_filenames.extend(os.path.join(val_files, 'Residential/THERMAL/') + file for file in residential_val[1:-1]) val_filenames.extend(os.path.join(val_files, 'Urban/THERMAL/') + file for file in urban_val[1:-1]) thermal = True else: fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") train_filenames = readlines(fpath.format("train")) val_filenames = readlines(fpath.format("val")) assert (self.opt.img_ext == '.png') or (self.opt.img_ext == '.jpg') or (self.opt.img_ext == '.jpeg'), "Please provide a correct image extension" img_ext = self.opt.img_ext num_train_samples = len(train_filenames) self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset = self.dataset( self.opt.data_path, train_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=True, img_ext=img_ext, thermal=thermal) self.train_loader = DataLoader( train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_dataset = self.dataset( self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext, thermal = thermal) self.val_loader = DataLoader( val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) # self.writers = {} # for mode in ["train", "val"]: # self.writers[mode] = SummaryWriter(os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2 ** scale) w = self.opt.width // (2 ** scale) self.backproject_depth[scale] = BackprojectDepth(self.opt.batch_size, h, w) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"] if self.opt.dataset.startswith('kitti'): print("Using split:\n ", self.opt.split) else: print("Using dataset:\n ", self.opt.dataset) print("There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts()
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") #指定使用的设备 配合 .to()函数使用 一定要在读取数据之前 self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames #设定pose网络的frames assert self.opt.frame_ids[ 0] == 0, "frame_ids must start with 0" #进行判断,frame的id如果不是从0开始的则报错 self.use_pose_net = not ( self.opt.use_stereo and self.opt.frame_ids == [0] ) #默认设置了use_stereo则是用的双目,否则就是用单目 !!!且双目的不用多帧!!! if self.opt.use_stereo: self.opt.frame_ids.append("s") #加s表示是双目的 在最后的一位表示双目 #进行网络的设定,encoder、decoder self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", self.opt.BA2M, self.opt.CBAM, self.opt.BAM) self.models["encoder"].to(self.device) #一定要在读取数据之前 self.parameters_to_train += list( self.models["encoder"].parameters()) #获取网络的参数!!!!! self.models["depth"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales) #num_ch_enc在哪里加进去的???? self.models["depth"].to(self.device) self.parameters_to_train += list( self.models["depth"].parameters()) #获取网络的参数!!!!! #使用的pose网络 if self.use_pose_net: if self.opt.pose_model_type == "separate_resnet": #确定encoder层是和depth共享还是不是贡献 self.models["pose_encoder"] = networks.ResnetEncoder( self.opt.num_layers, self.opt.weights_init == "pretrained", self.opt.BA2M, self.opt.CBAM, self.opt.BAM, num_input_images=self.num_pose_frames) self.models["pose_encoder"].to(self.device) self.parameters_to_train += list( self.models["pose_encoder"].parameters()) #获取网络的参数!!!!! self.models["pose"] = networks.PoseDecoder( self.models["pose_encoder"].num_ch_enc, num_input_features=1, num_frames_to_predict_for=2) #注意pose网络的decoder部分没有像之前获取保存网络的参数!!! elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( self.models["encoder"].num_ch_enc, self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.models["pose"].to(self.device) self.parameters_to_train += list(self.models["pose"].parameters()) #那是因为最后再进行保存!!!!!!!!!!!!!!!!!!!!!! #是否使用本文的auto-masking if self.opt.predictive_mask: assert self.opt.disable_automasking, \ "When using predictive_mask, please disable automasking with --disable_automasking" # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( self.models["encoder"].num_ch_enc, self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.models["predictive_mask"].to(self.device) self.parameters_to_train += list( self.models["predictive_mask"].parameters()) #进行参数的优化并动态调整学习率 self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1 ) #调整学习率的 new_lr = 0.1 * lr 调整间隔为shceduler_step_size【也就是epoch】 #如果要load模型,调用load_model()加载模型 if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print("Training is using:\n ", self.device) # data部分 datasets_dict = { "kitti": datasets.KITTIRAWDataset, "kitti_odom": datasets.KITTIOdomDataset } self.dataset = datasets_dict[self.opt.dataset] #假如是KITTIRAWDAtaset fpath = os.path.join(os.path.dirname(__file__), "splits", self.opt.split, "{}_files.txt") #os.path.join(path, "", "")将几个字符串连接起来当作新的路径 #os.path.dirname(__file__) 获得当前脚本的绝对路径 #确定哪种方式进行训练测试 train_filenames = readlines( fpath.format("train")) #此处的.format连接前面的参数fpath中的{} val_filenames = readlines(fpath.format("val")) img_ext = '.png' if self.opt.png else '.jpg' #得到作为训练和测试的训练样本的名字 num_train_samples = len(train_filenames) #训练的总数据量 self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs #计算总格的steps数,每个batch过后将会更新一次参数 train_dataset = self.dataset( self.opt.data_path, train_filenames, self.opt.height, self.opt.width, #此处是对MonoDataset部分的初始化 self.opt.frame_ids, 4, is_train=True, img_ext=img_ext) #对数据集进行一定的设定,观察kitti或者kitti_odom中的数据为继承!!!!! self.train_loader = DataLoader( train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) #pin_memory表示锁页内存,显卡里的内存全是锁页内存,里面的内容不会与主机的虚拟内存进行交换 #加载数据,DataLoader(dataset=torch_dataset,batch_size = BATCH_SIZE, shuffle = True, num_works = 2) # shuffle:表示是否打乱数据 num_workd表示多线程 默认线程数为2 val_dataset = self.dataset(self.opt.data_path, val_filenames, self.opt.height, self.opt.width, self.opt.frame_ids, 4, is_train=False, img_ext=img_ext) self.val_loader = DataLoader( #加载data val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) #这里不是很理解是什么意思 self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = SSIM() #此处的SSIM在Layers层中 self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.opt.height // (2**scale) w = self.opt.width // (2**scale) self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w) #BackprojectDepth将depth转化成3D cloud self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w) self.project_3d[scale].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) self.save_opts() #将进行的操作保存起来
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) Path(self.log_path).mkdir(exist_ok=True, parents=True) (Path(self.log_path) / "command").open('w+').write(" ".join(sys.argv)) # checking height and width are multiples of 32 # assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" # assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") self.parallel = not self.opt.no_cuda and torch.cuda.device_count() > 1 if self.parallel and self.opt.mode is Mode.Cubemap: assert self.opt.batch_size % torch.cuda.device_count() == 0, f"Cubemap batch size ({self.opt.batch_size})" \ f" must be evenly divisible by the number of" \ f" GPUs ({torch.cuda.device_count()})" self.num_scales = len(self.opt.scales) self.num_input_frames = len(self.opt.frame_ids) self.num_pose_frames = 2 if self.opt.pose_model_input == "pairs" else self.num_input_frames assert self.opt.frame_ids[0] == 0, "frame_ids must start with 0" self.use_pose_net = not (self.opt.use_stereo and self.opt.frame_ids == [0]) if self.opt.use_stereo: self.opt.frame_ids.append("s") conv_layer, data_lambda, intrinsics = get_params(options) self.intrinsics = intrinsics self.height = self.opt.height or self.intrinsics.height self.width = self.opt.width or self.intrinsics.width self.models["encoder"] = networks.ResnetEncoder( conv_layer, self.opt.num_layers, self.opt.weights_init == "pretrained") self.store_model("encoder") self.models["depth"] = networks.DepthDecoder( conv_layer, self.get_num_ch_enc(self.models["encoder"]), self.opt.scales) self.store_model("depth") if self.use_pose_net: # true if self.opt.pose_model_type == "separate_resnet": # true self.models["pose_encoder"] = networks.ResnetEncoder( conv_layer, self.opt.num_layers, self.opt.weights_init == "pretrained", num_input_images=self.num_pose_frames) self.store_model("pose_encoder") self.models["pose"] = networks.PoseDecoder( conv_layer, self.get_num_ch_enc(self.models["pose_encoder"]), num_input_features=1, num_frames_to_predict_for=2) elif self.opt.pose_model_type == "shared": self.models["pose"] = networks.PoseDecoder( conv_layer, self.get_num_ch_enc(self.models["encoder"]), self.num_pose_frames) elif self.opt.pose_model_type == "posecnn": self.models["pose"] = networks.PoseCNN( conv_layer, self.num_input_frames if self.opt.pose_model_input == "all" else 2) self.store_model("pose") if self.opt.predictive_mask: # false assert self.opt.disable_automasking, \ "When using predictive_mask, please disable automasking with --disable_automasking" # Our implementation of the predictive masking baseline has the the same architecture # as our depth decoder. We predict a separate mask for each source frame. self.models["predictive_mask"] = networks.DepthDecoder( conv_layer, self.get_num_ch_enc(self.models["encoder"]), self.opt.scales, num_output_channels=(len(self.opt.frame_ids) - 1)) self.store_model("predictive_mask") self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) if self.opt.load_weights_folder is not None: self.load_model() print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.opt.log_dir) print( "Training is using:\n ", f"{self.device}" + (f" on {torch.cuda.device_count()} GPUs" if self.parallel else "")) num_train_samples = len(load_csv(options.train_data)) * 1000 self.num_total_steps = num_train_samples // self.opt.batch_size * self.opt.num_epochs train_dataset, val_dataset = get_datasets(options, data_lambda, intrinsics) self.train_loader = DataLoader(train_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_loader = DataLoader(val_dataset, self.opt.batch_size, True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) if not self.opt.no_ssim: self.ssim = self.wrap_model(SSIM()) # TODO can I parallelize? self.ssim.to(self.device) self.backproject_depth = {} self.project_3d = {} for scale in self.opt.scales: h = self.height // (2**scale) w = self.width // (2**scale) # TODO should be able to paralalize self.backproject_depth[scale] = BackprojectDepth( self.opt.batch_size, h, w, options.mode) self.backproject_depth[scale].to(self.device) self.project_3d[scale] = Project3D(self.opt.batch_size, h, w, options.mode) self.project_3d[scale].to(self.device) if options.mode is Mode.Cubemap: self.models["cube_pose_and_loss"] = self.wrap_model( CubePosesAndLoss()) self.models["cube_pose_and_loss"].to(self.device) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] self.train_items = len(train_dataset) self.val_items = len(val_dataset) print("Using split:\n ", self.opt.split) print( "There are {:d} training items and {:d} validation items\n".format( self.train_items, self.val_items)) self.save_opts()