def load_loss(param: Parameters) -> AbstractLoss: # Load the loss by string from the parameters loss_class = getattr(Losses, param.loss_function) initialization_arguments = loss_class.__init__.__code__.co_varnames arguments = dict() if len(initialization_arguments) > 0: for k in initialization_arguments: if k in param.get_params_as_dict(): arguments[k] = param.get_params_as_dict()[k] CometLogger.print("Using loss: {}".format(param.loss_function)) return loss_class(**arguments)
def _log_compounded_metrics(self, ATEs, REs, drift_errors): metric_logger = MetricLogger() compound_drift_errors = CompoundTranslationRotationDrift( self.model_name, drift_errors) metric_logger.log(compound_drift_errors) CometLogger.get_experiment().log_metric( "avg_translation_error_percent", compound_drift_errors.metrics["avg_translation_error_percent"]) CometLogger.get_experiment().log_metric( "avg_rotation_error_degrees_per_meter", compound_drift_errors. metrics["avg_rotation_error_degrees_per_meter"]) compound_ATE = CompoundAbsoluteTrajectoryError(self.model_name, ATEs) metric_logger.log(compound_ATE) CometLogger.get_experiment().log_metric( "ATE_trans_RMSE", compound_ATE.metrics["absolute_trajectory_error"] ['ATE_trans_stats']["rmse"]) CometLogger.get_experiment().log_metric( "ATE_rot_degrees_RMSE", compound_ATE.metrics["absolute_trajectory_error"]['ATE_rot_stats'] ["rmse"]) compound_RE = CompoundRelativeError(self.model_name, REs) metric_logger.log(compound_RE)
def handle_timeout(self, signum, frame): self.state = BaseTimeout.TIMED_OUT self.traceback = traceback.format_stack(frame) d = {'_frame': frame} # Allow access to frame object. d.update(frame.f_globals) # Unless shadowed by global d.update(frame.f_locals) message = "Timeout Signal received.\nTraceback:\n" message += ''.join(self.traceback) CometLogger.print(message) exception_message = 'Block exceeded maximum timeout value (%d seconds). \nTraceback:' % self.seconds exception_message += ''.join(self.traceback) raise TimeoutException(exception_message)
def _log_matrix_poses(self, poses, poses_gt, dataset_name: str, trajectory_name: str): """ Logs the pose in text format where the angle is a rotation matrix: T00 T01 T02 T03 T10 T11 T12 T13 T20 T21 T22 T23 0 0 0 1 T00 T01 T02 T03 T10 T11 T12 T13 T20 T21 T22 T23 """ pose_output = "" pose_gt_output = "" matrices = Geometry.poses_to_transformations_matrix( poses[:, 3:], poses[:, :3]) matrices_gt = Geometry.poses_to_transformations_matrix( poses_gt[:, 3:], poses_gt[:, :3]) for i, _ in enumerate(matrices): pose_matrix = matrices[i] pose_matrix_gt = matrices_gt[i] pose_output = pose_output + f"{pose_matrix[0][0]} {pose_matrix[0][1]} {pose_matrix[0][2]} {pose_matrix[0][3]} " \ f"{pose_matrix[1][0]} {pose_matrix[1][1]} {pose_matrix[1][2]} {pose_matrix[1][3]} " \ f"{pose_matrix[2][0]} {pose_matrix[2][1]} {pose_matrix[2][2]} {pose_matrix[2][3]}" pose_gt_output = pose_gt_output + f"{pose_matrix_gt[0][0]} {pose_matrix_gt[0][1]} {pose_matrix_gt[0][2]} {pose_matrix_gt[0][3]} " \ f"{pose_matrix_gt[1][0]} {pose_matrix_gt[1][1]} {pose_matrix_gt[1][2]} {pose_matrix_gt[1][3]} " \ f"{pose_matrix_gt[2][0]} {pose_matrix_gt[2][1]} {pose_matrix_gt[2][2]} {pose_matrix_gt[2][3]}" if i < len(poses) - 1: pose_output = pose_output + "\n" pose_gt_output = pose_gt_output + "\n" metadata = dict() metadata["title"] = "pose_output_matrix" metadata["dataset"] = dataset_name metadata["trajectory"] = trajectory_name metadata["model"] = self.model_name filename = f'{metadata["title"]}_{dataset_name}_{trajectory_name}_{metadata["model"]}.txt' CometLogger.get_experiment().log_asset_data(pose_output, name=filename, metadata=metadata) metadata["title"] = "pose_gt_output_matrix" filename = f'{metadata["title"]}_{dataset_name}_{trajectory_name}_{metadata["model"]}.txt' CometLogger.get_experiment().log_asset_data(pose_gt_output, name=filename, metadata=metadata)
def _load_image_sequence(self, segment: AbstractSegment) -> torch.Tensor: cache_directory = self.dataset_directory + "/segment_image_tensor_cache" self._create_cache_dir(cache_directory) try: with ThreadingTimeout(2.0) as timeout_ctx1: images = torch.load("{}/{}.pkl".format(cache_directory, segment.__hash__())) if not bool(timeout_ctx1): CometLogger.print('Took too long when loading a cache image. ' 'We will load the image directly form the dataset instead.') raise Exception() except: image_sequence = [] with ThreadingTimeout(3600.0) as timeout_ctx2: for img_as_img in segment.get_images(): img_as_tensor = self.transformer(img_as_img) if self.minus_point_5: img_as_tensor = img_as_tensor - 0.5 # from [0, 1] -> [-0.5, 0.5] img_as_tensor = self.normalizer(img_as_tensor) img_as_tensor = img_as_tensor.unsqueeze(0) image_sequence.append(img_as_tensor) images = torch.cat(image_sequence, 0) if not bool(timeout_ctx2): CometLogger.fatalprint('Encountered fatal delay when reading the uncached images from the dataset') free = -1 try: with ThreadingTimeout(2.0) as timeout_ctx3: _, _, free = shutil.disk_usage(cache_directory) if not bool(timeout_ctx3): CometLogger.print('Took too long to measure disk space. Skipping caching.') except Exception as e: print("Warning: unable to cache the segment's image tensor, there was an error while getting " "disk usage: {}".format(e), file=sys.stderr) if free == -1: pass elif free // (2**30) > 1: try: with ThreadingTimeout(5.0) as timeout_ctx4: torch.save(images, "{}/{}.pkl".format(cache_directory, segment.__hash__())) if not bool(timeout_ctx4): CometLogger.print('Took too long when saving to cache folder. Deadlock possible. Skipping caching.') except Exception as e: print("Warning: unable to cache the segment's image tensor: {}".format(e), file=sys.stderr) else: pass if self.augment_dataset: images = self._augment_image_sequence(images) return images
def launch_experiment(experiments: list, repo: Repo): param = Parameters() # The datasets needs to be segmented before any experiments is launched to prevent process conflicts if param.segment_dataset: Loaders.segment_datasets(param) world_size = torch.cuda.device_count() if cuda_is_available( ) and world_size > 1 and 1 < len(experiments) <= world_size: print("-> Launching {} parallel experiments...".format( torch.cuda.device_count())) experiment_keys = [experiment.get_key() for experiment in experiments] print("-> experiment keys: {}".format(experiment_keys)) experiment_params = [experiment.params for experiment in experiments] api_key = experiments[0].api_key print("-> spawning the experiments' processes") multiprocessing.spawn(launch_parallel_experiment, nprocs=len(experiments), args=(api_key, experiment_keys, experiment_params, repo.git_dir)) elif len(experiments) == 1: with CometLogger(experiments[0]): print("-> launching single experiment") launch_single_GPU_experiment(experiments[0], repo, param) else: raise NotImplementedError()
def _load_kitti(par: Parameters) -> tuple: #Load the dataset by string from the parameters try: dataset_class = getattr(KITTI, "KITTI" + par.dataset_suffix) except: NotImplementedError( "Dataset class {} does not exist. Please check the dataset name and dataset_suffix" ) CometLogger.print("Using specific dataset: {}".format("KITTI" + par.dataset_suffix)) train_dataset = dataset_class( par.kitti_training_segments, new_size=(par.img_w, par.img_h), img_mean=par.kitti_mean, img_std=par.kitti_std, resize_mode=par.resize_mode, minus_point_5=par.minus_point_5, augment_dataset=par.training_dataset_augmentation) train_sampler = SortedRandomBatchSegmentSampler( dataset=train_dataset, batch_size=par.batch_size, drop_last=par.drop_last_extra_segment) train_dl = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem) valid_dataset = dataset_class(par.kitti_validation_segments, new_size=(par.img_w, par.img_h), img_mean=par.kitti_mean, img_std=par.kitti_std, resize_mode=par.resize_mode, minus_point_5=par.minus_point_5) valid_sampler = SortedRandomBatchSegmentSampler( dataset=valid_dataset, batch_size=par.batch_size, drop_last=par.drop_last_extra_segment) valid_dl = DataLoader(valid_dataset, batch_sampler=valid_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem) return train_dataset, train_dl, valid_dataset, valid_dl
def _map_pretrained_model_to_current_model(pretrained_model_path: str, model: nn.Module): CometLogger.print( "Loading pretrain model: {}".format(pretrained_model_path)) pretrained_model = torch.load(pretrained_model_path, map_location='cpu') try: model.load_state_dict(pretrained_model) except: model_dict = model.state_dict() # Will map values of common keys only common_updated_dict = { k: v for k, v in pretrained_model['state_dict'].items() if k in model_dict } model_dict.update(common_updated_dict) model.load_state_dict(model_dict)
def load_experiment_assets(param): CometLogger.print("~~ Loading dataset's dataloaders ~~") train_dataloader, valid_dataloader = load_dataset_dataloaders(param) CometLogger.print("~~ Loading the model ~~") model = load_model(param) CometLogger.print("~~ Loading the optimizer ~~") optimizer = load_optimizer(param, model) CometLogger.print("~~ Loading the loss ~~") loss = load_loss(param) return loss, model, optimizer, train_dataloader, valid_dataloader
def _load_midAir_dataset(param: Parameters) -> tuple: #Load the dataset by string from the parameters try: dataset_class = getattr(MidAir, "MidAir" + param.dataset_suffix) except: NotImplementedError( "Dataset class {} does not exist. Please check the dataset name and dataset_suffix" ) CometLogger.print( "Using specific dataset: {}".format("MidAir" + param.dataset_suffix)) train_dataset = dataset_class( param.midair_training_path, new_size=(param.img_w, param.img_h), img_mean=param.midair_mean, img_std=param.midair_std, resize_mode=param.resize_mode, minus_point_5=param.minus_point_5, augment_dataset=param.training_dataset_augmentation) train_random_sampler = SortedRandomBatchSegmentSampler( dataset=train_dataset, batch_size=param.batch_size, drop_last=param.drop_last_extra_segment) valid_dataset = dataset_class(param.midair_validation_path, new_size=(param.img_w, param.img_h), img_mean=param.midair_mean, img_std=param.midair_std, resize_mode=param.resize_mode, minus_point_5=param.minus_point_5) valid_random_sampler = SortedRandomBatchSegmentSampler( dataset=valid_dataset, batch_size=param.batch_size, drop_last=param.drop_last_extra_segment) train_dataloader = DataLoader(train_dataset, num_workers=param.n_processors, pin_memory=param.pin_mem, batch_sampler=train_random_sampler) valid_dataloader = DataLoader(valid_dataset, num_workers=param.n_processors, pin_memory=param.pin_mem, batch_sampler=valid_random_sampler) return train_dataset, train_dataloader, valid_dataset, valid_dataloader
def _log_quaternion_poses(self, poses, poses_gt, dataset_name: str, trajectory_name: str): """ Logs the pose in text format where the angle is a quaternion: timestamp tx ty tz qx qy qz qw """ pose_output = "" pose_gt_output = "" for i, pose in enumerate(poses): # att.elements[[1, 2, 3, 0]] reorganizes quaternion elements # from scalar first w-x-y-z to scalar last x-y-z-w rotation_quat = Geometry.tait_bryan_rotation_to_quaternion( pose[:3]).elements[[1, 2, 3, 0]] rotation_quat_gt = Geometry.tait_bryan_rotation_to_quaternion( poses_gt[i][:3]).elements[[1, 2, 3, 0]] pose_output = pose_output + f"{i} {pose[3]} {pose[4]} {pose[5]} " \ f"{rotation_quat[0]} {rotation_quat[1]} {rotation_quat[2]} {rotation_quat[3]}" pose_gt_output = pose_gt_output + f"{i} {poses_gt[i][3]} {poses_gt[i][4]} {poses_gt[i][5]} " \ f"{rotation_quat_gt[0]} {rotation_quat_gt[1]} {rotation_quat_gt[2]} " \ f"{rotation_quat_gt[3]}" if i < len(poses) - 1: pose_output = pose_output + "\n" pose_gt_output = pose_gt_output + "\n" metadata = dict() metadata["title"] = "pose_output_quaternion" metadata["dataset"] = dataset_name metadata["trajectory"] = trajectory_name metadata["model"] = self.model_name filename = f'{metadata["title"]}_{dataset_name}_{trajectory_name}_{metadata["model"]}.txt' CometLogger.get_experiment().log_asset_data(pose_output, name=filename, metadata=metadata) metadata["title"] = "pose_gt_output_quaternion" filename = f'{metadata["title"]}_{dataset_name}_{trajectory_name}_{metadata["model"]}.txt' CometLogger.get_experiment().log_asset_data(pose_gt_output, name=filename, metadata=metadata)
def __init__(self, model: nn.Module, train_dataloader: DataLoader, valid_dataloader, optimizer: Optimizer, loss: AbstractLoss, early_stopping_patience=7, model_backup_destination="./", resume=False, gradient_clipping_value=None): self.model: nn.Module = model self.train_dataloader: DataLoader = train_dataloader self.valid_dataloader: DataLoader = valid_dataloader self.optimizer: Optimizer = optimizer # Loss used for benchmarking agaisnt other runs only in case the loss function from which backprop is computed changes self.benchmark_MSE_loss: AbstractLoss = BatchSegmentMSELoss() # Custom loss is used for backpropagating self.custom_loss: AbstractLoss = loss self.gradient_clipping_value = gradient_clipping_value self.model_backup_destination = self._get_backup_destination( model_backup_destination, model, train_dataloader, optimizer, loss) self.early_stopper = EarlyStopping( patience=early_stopping_patience, verbose=True, destination_path=self.model_backup_destination) if resume: CometLogger.print("Resuming the training of {}".format( self.model_backup_destination)) CometLogger.print( "Overriding the Model and Optimizer's state dictionaries with the checkpoint's dicts" ) self.model.load_state_dict( self.early_stopper.load_model_checkpoint()) self.optimizer.load_state_dict( self.early_stopper.load_optimizer_checkpoint())
def launch_parallel_experiment(gpu_rank, api_key, experiment_keys, experiment_params, repo_path): torch.cuda.set_device(gpu_rank) param = Parameters() param.segment_dataset = False param.model_backup_destination = param.model_backup_destination + "/process_{}".format( gpu_rank) experiment = ExistingExperiment( api_key=api_key, previous_experiment=experiment_keys[gpu_rank], log_env_details=True, log_env_gpu=True, log_env_cpu=True) experiment.params = experiment_params[gpu_rank] repo = Repo(repo_path) with CometLogger(experiment, gpu_id=gpu_rank, print_to_comet_only=True): setup_comet_experiment(experiment, param, repo) CometLogger.print("-> loading experiments assets:") loss, model, optimizer, train_dataloader, valid_dataloader = load_experiment_assets( param) if param.train: CometLogger.print("~~ Launching the training ~~") CometLogger.print( "Sleeping {} secs to reduce chances of deadlock.".format( gpu_rank)) sleep(gpu_rank) launch_training(model, train_dataloader, valid_dataloader, optimizer, loss, param) if param.test: CometLogger.print("~~ Testing the model ~~") launch_testing(model, param) del train_dataloader, valid_dataloader, model, optimizer, loss torch.cuda.empty_cache()
def run(self, epochs_number: int) -> nn.Module: for epoch in self._epochs(epochs_number): CometLogger.print("=========== Epoch {} ===========".format(epoch)) t0 = time.time() custom_train_loss, train_benchmark_loss = self._train() custom_valid_loss, valid_benchmark_loss = self._validate() t1 = time.time() epoch_run_time = t1 - t0 self._log_epoch(custom_train_loss, custom_valid_loss, epoch, epoch_run_time, train_benchmark_loss, valid_benchmark_loss) self.early_stopper(custom_valid_loss, self.model, self.optimizer) if self.early_stopper.early_stop: CometLogger.get_experiment().log_metric( "Early stop epoch", epoch + 1) CometLogger.print("Early stopping") break CometLogger.print( "Training complete, loading the last early stopping checkpoint to memory..." ) self.model.load_state_dict(self.early_stopper.load_model_checkpoint()) return self.model
def load_dataset_dataloaders(param: Parameters) -> tuple: if param.dataset == "KITTI": CometLogger.print("Using dataset source: KITTI") train_dataset, train_dataloader, valid_dataset, valid_dataloader = _load_kitti( param) elif param.dataset == "MidAir": CometLogger.print("Using dataset source: MidAir") train_dataset, train_dataloader, valid_dataset, valid_dataloader = _load_midAir_dataset( param) elif param.dataset == "all": CometLogger.print("Using dataset source: All") train_dataset, train_dataloader, valid_dataset, valid_dataloader = _load_all_datasets( param) else: raise NotImplementedError() return train_dataloader, valid_dataloader
def __getitem__(self, item: int): with ThreadingTimeout(3600.0) as timeout_ctx1: try: segment, image_sequence = super().__getitem__(item) except Exception as e: CometLogger.print(str(e)) raise e if not bool(timeout_ctx1): CometLogger.fatalprint( 'Encountered fatal delay while getting the image sequence') with ThreadingTimeout(3600.0) as timeout_ctx2: pose = self._get_segment_pose(segment) if not bool(timeout_ctx2): CometLogger.fatalprint( 'Encountered fatal delay while getting the pose of the sequence' ) return image_sequence, pose
def _log_epoch(self, custom_train_loss, custom_valid_loss, epoch, epoch_run_time, train_benchmark_loss, valid_benchmark_loss): CometLogger.print("Epoch run time: {}".format(epoch_run_time)) CometLogger.get_experiment().log_metric("epoch run time", epoch_run_time, epoch=epoch) CometLogger.get_experiment().log_metric("mean training loss", train_benchmark_loss, epoch=epoch) CometLogger.get_experiment().log_metric("mean validation loss", valid_benchmark_loss, epoch=epoch) CometLogger.get_experiment().log_metric("custom mean training loss", custom_train_loss, epoch=epoch) CometLogger.get_experiment().log_metric("custom mean validation loss", custom_valid_loss, epoch=epoch) CometLogger.print("Mean train loss: {}, Valid train loss: {}".format( custom_train_loss, custom_valid_loss)) CometLogger.get_experiment().log_metric("epoch", epoch) CometLogger.get_experiment().log_epoch_end(epoch_cnt=epoch)
def _train(self) -> tuple: timer_start_time = time.time() self.model.train() losses_sum = 0 benchmark_losses_sum = 0 for i, (input, target) in enumerate(self.train_dataloader): CometLogger.get_experiment().log_metric("Current batch", i + 1) CometLogger.get_experiment().log_metric("Total nbr of batches", len(self.train_dataloader)) # Only log this if we are NOT in a multiprocessing session if CometLogger.gpu_id is None: print("--> processing batch {}/{} of size {}".format( i + 1, len(self.train_dataloader), len(input))) if cuda_is_available(): with ThreadingTimeout(14400.0) as timeout_ctx1: input = input.cuda( non_blocking=self.train_dataloader.pin_memory) target = target.cuda( non_blocking=self.train_dataloader.pin_memory) if not bool(timeout_ctx1): CometLogger.fatalprint( 'Encountered fatally long delay when moving tensors to GPUs' ) prediction = self.model.forward(input) with ThreadingTimeout(14400.0) as timeout_ctx3: if type(prediction) is tuple: benchmark_loss = self.benchmark_MSE_loss.compute( prediction[0], target) else: benchmark_loss = self.benchmark_MSE_loss.compute( prediction, target) if not bool(timeout_ctx3): CometLogger.fatalprint( 'Encountered fatally long delay during computation of benchmark loss' ) with ThreadingTimeout(14400.0) as timeout_ctx4: benchmark_losses_sum += float( benchmark_loss.data.cpu().numpy()) if not bool(timeout_ctx4): CometLogger.fatalprint( 'Encountered fatally long delay during summation of benchmark losses' ) with ThreadingTimeout(14400.0) as timeout_ctx4: loss = self.custom_loss.compute(prediction, target) if not bool(timeout_ctx4): CometLogger.fatalprint( 'Encountered fatally long delay during computation of the custom loss' ) self._backpropagate(loss) with ThreadingTimeout(14400.0) as timeout_ctx6: losses_sum += float(loss.data.cpu().numpy()) if not bool(timeout_ctx6): CometLogger.fatalprint( 'Encountered fatally long delay during loss addition') timer_end_time = time.time() CometLogger.get_experiment().log_metric( "Epoch training time", timer_end_time - timer_start_time) return losses_sum / len( self.train_dataloader), benchmark_losses_sum / len( self.train_dataloader)
def load_model(param: Parameters) -> nn.Module: if param.model == "DeepVO": CometLogger.print("Using DeepVO") model = DeepVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "CoordConvDeepVO": CometLogger.print("Using CoordConvDeepVO") model = CoordConvDeepVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "MagicVO": CometLogger.print("Using MagicVO") model = MagicVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SelfAttentionVO": CometLogger.print("Using SelfAttentionVO") model = SelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SplitSelfAttentionVO": CometLogger.print("Using SplitSelfAttentionVO") model = SplitSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "CoordConvSelfAttentionVO": CometLogger.print("Using CoordConvSelfAttentionVO") model = CoordConvSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SimpleSelfAttentionVO": CometLogger.print("Using SimpleSelfAttentionVO") model = SimpleSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "PositionalSimpleSelfAttentionVO": CometLogger.print("Using PositionalSimpleSelfAttentionVO") model = PositionalSimpleSelfAttentionVO( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SkippedSelfAttention": CometLogger.print("Using SkippedSelfAttention") model = SkippedSelfAttention(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "WeightedSelfAttentionVO": CometLogger.print("Using WeightedSelfAttentionVO") model = WeightedSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SelfAttentionVO_GlobRelOutput": CometLogger.print("Using SelfAttentionVO_GlobRelOutput") model = SelfAttentionVO_GlobRelOutput( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "StackedSelfAttentionVO": CometLogger.print("Using StackedSelfAttentionVO") model = StackedSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "NoSelfAttentionVO": CometLogger.print("Using NoSelfAttentionVO") model = NoSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SnailSelfAttentionVO": CometLogger.print("Using SnailSelfAttentionVO") model = SnailSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SnailVO": CometLogger.print("Using SnailSelfAttentionVO") model = SnailVO(param.img_h, param.img_w, 5) elif param.model == "GlobalRelativeSelfAttentionVO": CometLogger.print("Using GlobalRelativeSelfAttentionVO") model = GlobalRelativeSelfAttentionVO( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "GlobalRelativeTransformerVO": CometLogger.print("Using GlobalRelativeTransformerVO") model = GlobalRelativeTransformerVO( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "GlobalRelativeTransformerVO_globXAsKeyVal": CometLogger.print("Using GlobalRelativeTransformerVO_globXAsKeyVal") model = GlobalRelativeTransformerVO_globXAsKeyVal( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "GlobalRelativeSelfAttentionVO_globXasKeyVal": CometLogger.print("Using GlobalRelativeSelfAttentionVO_globXasKeyVal") model = GlobalRelativeSelfAttentionVO_globXasKeyVal( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) else: CometLogger.print("{} was not implemented".format(param.model)) raise NotImplementedError() _map_pretrained_model_to_current_model(param.pretrained_model, model) if cuda_is_available(): CometLogger.print("Training with CUDA") model.cuda() else: CometLogger.print("CUDA not available. Training on the CPU.") return model
def run(self): trajectory_rotation_losses = [] trajectory_translation_losses = [] drift_errors = [] ATEs = [] REs = [] for dataset_name, trajectory_name, dataloader in self.trajectory_dataloaders: dataset: AbstractSegmentDataset = dataloader.dataset print("testing {}, {}".format(trajectory_name, dataset_name)) start = time.time() predictions, rotation_losses, translation_losses, absolute_ground_truth = self._test( dataloader) end = time.time() last_included_index = self._trim_trajectories( absolute_ground_truth[:, 3:]) predictions = predictions[:last_included_index + 1] absolute_ground_truth = absolute_ground_truth[: last_included_index + 1] CometLogger.print( f"Inferred {len(predictions)} poses in {end-start} seconds.\n" f"Dataset fps: {dataset.framerate}, inference fps {len(predictions)/(end-start)}." ) trajectory_rotation_losses.append( (dataset_name, trajectory_name, rotation_losses)) trajectory_translation_losses.append( (dataset_name, trajectory_name, translation_losses)) plotter = TrajectoryPlotter(trajectory_name, dataset_name, self.model_name, absolute_ground_truth, predictions) CometLogger.get_experiment().log_figure( figure=plotter.rotation_figure, figure_name='rotation {} {}'.format(trajectory_name, dataset_name)) CometLogger.get_experiment().log_figure( figure=plotter.position_figure, figure_name='translation {} {}'.format(trajectory_name, dataset_name)) drift, ATE, RE = self._log_metrics(absolute_ground_truth, dataset, dataset_name, predictions, trajectory_name) drift_errors.append(drift) ATEs.append(ATE) REs.append(RE) self._log_matrix_poses(predictions, absolute_ground_truth, dataset_name, trajectory_name) self._log_quaternion_poses(predictions, absolute_ground_truth, dataset_name, trajectory_name) self._log_compounded_metrics(ATEs, REs, drift_errors) losses_figure = self._plot_trajectory_losses( trajectory_rotation_losses, trajectory_translation_losses) CometLogger.get_experiment().log_figure( figure=losses_figure, figure_name="trajectory_losses") # compute total avg losses translation_loss = self._complute_total_avg_loss( trajectory_translation_losses) rotation_loss = self._complute_total_avg_loss( trajectory_rotation_losses) CometLogger.get_experiment().log_metric( "Total Avg Translation loss (test phase)", translation_loss) CometLogger.get_experiment().log_metric( "Total Avg Rotation loss (test phase)", rotation_loss)
def load_optimizer(param: Parameters, model: nn.Module) -> Optimizer: CometLogger.get_experiment().log_parameter("Optimizer", param.optimizer) CometLogger.get_experiment().log_parameter("Learning rate", param.learning_rate) if param.optimizer is "Adagrad": CometLogger.print("Using Adagrad") return optim.Adagrad(model.parameters(), lr=param.learning_rate) elif param.optimizer is "Adam": CometLogger.print("Using Adam Optimizer") return optim.Adam(model.parameters(), lr=param.learning_rate) elif param.optimizer is "RMSProp": CometLogger.print("Using RMSProp Optimizer") return optim.RMSprop(model.parameters(), lr=param.learning_rate) else: CometLogger.print("Optimizer {} was not implemented".format( param.optimizer)) raise NotImplementedError()