Exemple #1
0
def load_loss(param: Parameters) -> AbstractLoss:
    # Load the loss by string from the parameters
    loss_class = getattr(Losses, param.loss_function)
    initialization_arguments = loss_class.__init__.__code__.co_varnames
    arguments = dict()

    if len(initialization_arguments) > 0:
        for k in initialization_arguments:
            if k in param.get_params_as_dict():
                arguments[k] = param.get_params_as_dict()[k]

    CometLogger.print("Using loss: {}".format(param.loss_function))
    return loss_class(**arguments)
Exemple #2
0
    def handle_timeout(self, signum, frame):
        self.state = BaseTimeout.TIMED_OUT
        self.traceback = traceback.format_stack(frame)
        d = {'_frame': frame}  # Allow access to frame object.
        d.update(frame.f_globals)  # Unless shadowed by global
        d.update(frame.f_locals)

        message = "Timeout Signal received.\nTraceback:\n"
        message += ''.join(self.traceback)

        CometLogger.print(message)

        exception_message = 'Block exceeded maximum timeout value (%d seconds). \nTraceback:' % self.seconds
        exception_message += ''.join(self.traceback)
        raise TimeoutException(exception_message)
Exemple #3
0
def _load_kitti(par: Parameters) -> tuple:

    #Load the dataset by string from the parameters
    try:
        dataset_class = getattr(KITTI, "KITTI" + par.dataset_suffix)
    except:
        NotImplementedError(
            "Dataset class {} does not exist. Please check the dataset name and dataset_suffix"
        )
    CometLogger.print("Using specific dataset: {}".format("KITTI" +
                                                          par.dataset_suffix))

    train_dataset = dataset_class(
        par.kitti_training_segments,
        new_size=(par.img_w, par.img_h),
        img_mean=par.kitti_mean,
        img_std=par.kitti_std,
        resize_mode=par.resize_mode,
        minus_point_5=par.minus_point_5,
        augment_dataset=par.training_dataset_augmentation)

    train_sampler = SortedRandomBatchSegmentSampler(
        dataset=train_dataset,
        batch_size=par.batch_size,
        drop_last=par.drop_last_extra_segment)
    train_dl = DataLoader(train_dataset,
                          batch_sampler=train_sampler,
                          num_workers=par.n_processors,
                          pin_memory=par.pin_mem)

    valid_dataset = dataset_class(par.kitti_validation_segments,
                                  new_size=(par.img_w, par.img_h),
                                  img_mean=par.kitti_mean,
                                  img_std=par.kitti_std,
                                  resize_mode=par.resize_mode,
                                  minus_point_5=par.minus_point_5)

    valid_sampler = SortedRandomBatchSegmentSampler(
        dataset=valid_dataset,
        batch_size=par.batch_size,
        drop_last=par.drop_last_extra_segment)
    valid_dl = DataLoader(valid_dataset,
                          batch_sampler=valid_sampler,
                          num_workers=par.n_processors,
                          pin_memory=par.pin_mem)

    return train_dataset, train_dl, valid_dataset, valid_dl
Exemple #4
0
def _map_pretrained_model_to_current_model(pretrained_model_path: str,
                                           model: nn.Module):
    CometLogger.print(
        "Loading pretrain model: {}".format(pretrained_model_path))
    pretrained_model = torch.load(pretrained_model_path, map_location='cpu')
    try:
        model.load_state_dict(pretrained_model)
    except:
        model_dict = model.state_dict()
        # Will map values of common keys only
        common_updated_dict = {
            k: v
            for k, v in pretrained_model['state_dict'].items()
            if k in model_dict
        }
        model_dict.update(common_updated_dict)
        model.load_state_dict(model_dict)
Exemple #5
0
def load_experiment_assets(param):
    CometLogger.print("~~ Loading dataset's dataloaders ~~")
    train_dataloader, valid_dataloader = load_dataset_dataloaders(param)
    CometLogger.print("~~ Loading the model ~~")
    model = load_model(param)
    CometLogger.print("~~ Loading the optimizer ~~")
    optimizer = load_optimizer(param, model)
    CometLogger.print("~~ Loading the loss ~~")
    loss = load_loss(param)
    return loss, model, optimizer, train_dataloader, valid_dataloader
Exemple #6
0
def _load_midAir_dataset(param: Parameters) -> tuple:

    #Load the dataset by string from the parameters
    try:
        dataset_class = getattr(MidAir, "MidAir" + param.dataset_suffix)
    except:
        NotImplementedError(
            "Dataset class {} does not exist. Please check the dataset name and dataset_suffix"
        )

    CometLogger.print(
        "Using specific dataset: {}".format("MidAir" + param.dataset_suffix))

    train_dataset = dataset_class(
        param.midair_training_path,
        new_size=(param.img_w, param.img_h),
        img_mean=param.midair_mean,
        img_std=param.midair_std,
        resize_mode=param.resize_mode,
        minus_point_5=param.minus_point_5,
        augment_dataset=param.training_dataset_augmentation)
    train_random_sampler = SortedRandomBatchSegmentSampler(
        dataset=train_dataset,
        batch_size=param.batch_size,
        drop_last=param.drop_last_extra_segment)
    valid_dataset = dataset_class(param.midair_validation_path,
                                  new_size=(param.img_w, param.img_h),
                                  img_mean=param.midair_mean,
                                  img_std=param.midair_std,
                                  resize_mode=param.resize_mode,
                                  minus_point_5=param.minus_point_5)
    valid_random_sampler = SortedRandomBatchSegmentSampler(
        dataset=valid_dataset,
        batch_size=param.batch_size,
        drop_last=param.drop_last_extra_segment)
    train_dataloader = DataLoader(train_dataset,
                                  num_workers=param.n_processors,
                                  pin_memory=param.pin_mem,
                                  batch_sampler=train_random_sampler)
    valid_dataloader = DataLoader(valid_dataset,
                                  num_workers=param.n_processors,
                                  pin_memory=param.pin_mem,
                                  batch_sampler=valid_random_sampler)

    return train_dataset, train_dataloader, valid_dataset, valid_dataloader
Exemple #7
0
    def __getitem__(self, item: int):
        with ThreadingTimeout(3600.0) as timeout_ctx1:
            try:
                segment, image_sequence = super().__getitem__(item)
            except Exception as e:
                CometLogger.print(str(e))
                raise e
        if not bool(timeout_ctx1):
            CometLogger.fatalprint(
                'Encountered fatal delay while getting the image sequence')

        with ThreadingTimeout(3600.0) as timeout_ctx2:
            pose = self._get_segment_pose(segment)
        if not bool(timeout_ctx2):
            CometLogger.fatalprint(
                'Encountered fatal delay while getting the pose of the sequence'
            )

        return image_sequence, pose
 def _log_epoch(self, custom_train_loss, custom_valid_loss, epoch,
                epoch_run_time, train_benchmark_loss, valid_benchmark_loss):
     CometLogger.print("Epoch run time: {}".format(epoch_run_time))
     CometLogger.get_experiment().log_metric("epoch run time",
                                             epoch_run_time,
                                             epoch=epoch)
     CometLogger.get_experiment().log_metric("mean training loss",
                                             train_benchmark_loss,
                                             epoch=epoch)
     CometLogger.get_experiment().log_metric("mean validation loss",
                                             valid_benchmark_loss,
                                             epoch=epoch)
     CometLogger.get_experiment().log_metric("custom mean training loss",
                                             custom_train_loss,
                                             epoch=epoch)
     CometLogger.get_experiment().log_metric("custom mean validation loss",
                                             custom_valid_loss,
                                             epoch=epoch)
     CometLogger.print("Mean train loss: {}, Valid train loss: {}".format(
         custom_train_loss, custom_valid_loss))
     CometLogger.get_experiment().log_metric("epoch", epoch)
     CometLogger.get_experiment().log_epoch_end(epoch_cnt=epoch)
Exemple #9
0
    def _load_image_sequence(self, segment: AbstractSegment) -> torch.Tensor:
        cache_directory = self.dataset_directory + "/segment_image_tensor_cache"

        self._create_cache_dir(cache_directory)

        try:
            with ThreadingTimeout(2.0) as timeout_ctx1:
                images = torch.load("{}/{}.pkl".format(cache_directory, segment.__hash__()))

            if not bool(timeout_ctx1):
                CometLogger.print('Took too long when loading a cache image. '
                                  'We will load the image directly form the dataset instead.')
                raise Exception()
        except:
            image_sequence = []

            with ThreadingTimeout(3600.0) as timeout_ctx2:
                for img_as_img in segment.get_images():
                    img_as_tensor = self.transformer(img_as_img)
                    if self.minus_point_5:
                        img_as_tensor = img_as_tensor - 0.5  # from [0, 1] -> [-0.5, 0.5]
                    img_as_tensor = self.normalizer(img_as_tensor)
                    img_as_tensor = img_as_tensor.unsqueeze(0)
                    image_sequence.append(img_as_tensor)
                images = torch.cat(image_sequence, 0)
            if not bool(timeout_ctx2):
                CometLogger.fatalprint('Encountered fatal delay when reading the uncached images from the dataset')

            free = -1
            try:
                with ThreadingTimeout(2.0) as timeout_ctx3:
                    _, _, free = shutil.disk_usage(cache_directory)
                if not bool(timeout_ctx3):
                    CometLogger.print('Took too long to measure disk space. Skipping caching.')

            except Exception as e:
                print("Warning: unable to cache the segment's image tensor, there was an error while getting "
                      "disk usage: {}".format(e), file=sys.stderr)

            if free == -1:
                pass
            elif free // (2**30) > 1:
                try:
                    with ThreadingTimeout(5.0) as timeout_ctx4:
                        torch.save(images, "{}/{}.pkl".format(cache_directory, segment.__hash__()))

                    if not bool(timeout_ctx4):
                        CometLogger.print('Took too long when saving to cache folder. Deadlock possible. Skipping caching.')

                except Exception as e:
                    print("Warning: unable to cache the segment's image tensor: {}".format(e), file=sys.stderr)
            else:
                pass

        if self.augment_dataset:
            images = self._augment_image_sequence(images)

        return images
    def __init__(self,
                 model: nn.Module,
                 train_dataloader: DataLoader,
                 valid_dataloader,
                 optimizer: Optimizer,
                 loss: AbstractLoss,
                 early_stopping_patience=7,
                 model_backup_destination="./",
                 resume=False,
                 gradient_clipping_value=None):
        self.model: nn.Module = model
        self.train_dataloader: DataLoader = train_dataloader
        self.valid_dataloader: DataLoader = valid_dataloader
        self.optimizer: Optimizer = optimizer
        # Loss used for benchmarking agaisnt other runs only in case the loss function from which backprop is computed changes
        self.benchmark_MSE_loss: AbstractLoss = BatchSegmentMSELoss()
        # Custom loss is used for backpropagating
        self.custom_loss: AbstractLoss = loss
        self.gradient_clipping_value = gradient_clipping_value
        self.model_backup_destination = self._get_backup_destination(
            model_backup_destination, model, train_dataloader, optimizer, loss)
        self.early_stopper = EarlyStopping(
            patience=early_stopping_patience,
            verbose=True,
            destination_path=self.model_backup_destination)

        if resume:
            CometLogger.print("Resuming the training of {}".format(
                self.model_backup_destination))
            CometLogger.print(
                "Overriding the Model and Optimizer's state dictionaries with the checkpoint's dicts"
            )
            self.model.load_state_dict(
                self.early_stopper.load_model_checkpoint())
            self.optimizer.load_state_dict(
                self.early_stopper.load_optimizer_checkpoint())
Exemple #11
0
def load_optimizer(param: Parameters, model: nn.Module) -> Optimizer:
    CometLogger.get_experiment().log_parameter("Optimizer", param.optimizer)
    CometLogger.get_experiment().log_parameter("Learning rate",
                                               param.learning_rate)

    if param.optimizer is "Adagrad":
        CometLogger.print("Using Adagrad")
        return optim.Adagrad(model.parameters(), lr=param.learning_rate)
    elif param.optimizer is "Adam":
        CometLogger.print("Using Adam Optimizer")
        return optim.Adam(model.parameters(), lr=param.learning_rate)
    elif param.optimizer is "RMSProp":
        CometLogger.print("Using RMSProp Optimizer")
        return optim.RMSprop(model.parameters(), lr=param.learning_rate)
    else:
        CometLogger.print("Optimizer {} was not implemented".format(
            param.optimizer))
        raise NotImplementedError()
Exemple #12
0
def launch_parallel_experiment(gpu_rank, api_key, experiment_keys,
                               experiment_params, repo_path):
    torch.cuda.set_device(gpu_rank)
    param = Parameters()
    param.segment_dataset = False
    param.model_backup_destination = param.model_backup_destination + "/process_{}".format(
        gpu_rank)
    experiment = ExistingExperiment(
        api_key=api_key,
        previous_experiment=experiment_keys[gpu_rank],
        log_env_details=True,
        log_env_gpu=True,
        log_env_cpu=True)
    experiment.params = experiment_params[gpu_rank]
    repo = Repo(repo_path)

    with CometLogger(experiment, gpu_id=gpu_rank, print_to_comet_only=True):
        setup_comet_experiment(experiment, param, repo)
        CometLogger.print("-> loading experiments assets:")
        loss, model, optimizer, train_dataloader, valid_dataloader = load_experiment_assets(
            param)

        if param.train:
            CometLogger.print("~~ Launching the training ~~")
            CometLogger.print(
                "Sleeping {} secs to reduce chances of deadlock.".format(
                    gpu_rank))
            sleep(gpu_rank)

            launch_training(model, train_dataloader, valid_dataloader,
                            optimizer, loss, param)
        if param.test:
            CometLogger.print("~~ Testing the model ~~")
            launch_testing(model, param)

    del train_dataloader, valid_dataloader, model, optimizer, loss
    torch.cuda.empty_cache()
Exemple #13
0
def load_dataset_dataloaders(param: Parameters) -> tuple:
    if param.dataset == "KITTI":
        CometLogger.print("Using dataset source: KITTI")
        train_dataset, train_dataloader, valid_dataset, valid_dataloader = _load_kitti(
            param)
    elif param.dataset == "MidAir":
        CometLogger.print("Using dataset source: MidAir")
        train_dataset, train_dataloader, valid_dataset, valid_dataloader = _load_midAir_dataset(
            param)
    elif param.dataset == "all":
        CometLogger.print("Using dataset source: All")
        train_dataset, train_dataloader, valid_dataset, valid_dataloader = _load_all_datasets(
            param)
    else:
        raise NotImplementedError()

    return train_dataloader, valid_dataloader
    def run(self, epochs_number: int) -> nn.Module:
        for epoch in self._epochs(epochs_number):
            CometLogger.print("=========== Epoch {} ===========".format(epoch))
            t0 = time.time()
            custom_train_loss, train_benchmark_loss = self._train()
            custom_valid_loss, valid_benchmark_loss = self._validate()
            t1 = time.time()
            epoch_run_time = t1 - t0
            self._log_epoch(custom_train_loss, custom_valid_loss, epoch,
                            epoch_run_time, train_benchmark_loss,
                            valid_benchmark_loss)
            self.early_stopper(custom_valid_loss, self.model, self.optimizer)

            if self.early_stopper.early_stop:
                CometLogger.get_experiment().log_metric(
                    "Early stop epoch", epoch + 1)
                CometLogger.print("Early stopping")
                break

        CometLogger.print(
            "Training complete, loading the last early stopping checkpoint to memory..."
        )
        self.model.load_state_dict(self.early_stopper.load_model_checkpoint())
        return self.model
    def run(self):
        trajectory_rotation_losses = []
        trajectory_translation_losses = []
        drift_errors = []
        ATEs = []
        REs = []

        for dataset_name, trajectory_name, dataloader in self.trajectory_dataloaders:
            dataset: AbstractSegmentDataset = dataloader.dataset
            print("testing {}, {}".format(trajectory_name, dataset_name))

            start = time.time()
            predictions, rotation_losses, translation_losses, absolute_ground_truth = self._test(
                dataloader)
            end = time.time()

            last_included_index = self._trim_trajectories(
                absolute_ground_truth[:, 3:])
            predictions = predictions[:last_included_index + 1]
            absolute_ground_truth = absolute_ground_truth[:
                                                          last_included_index +
                                                          1]

            CometLogger.print(
                f"Inferred {len(predictions)} poses in {end-start} seconds.\n"
                f"Dataset fps: {dataset.framerate}, inference fps {len(predictions)/(end-start)}."
            )

            trajectory_rotation_losses.append(
                (dataset_name, trajectory_name, rotation_losses))
            trajectory_translation_losses.append(
                (dataset_name, trajectory_name, translation_losses))

            plotter = TrajectoryPlotter(trajectory_name, dataset_name,
                                        self.model_name, absolute_ground_truth,
                                        predictions)
            CometLogger.get_experiment().log_figure(
                figure=plotter.rotation_figure,
                figure_name='rotation {} {}'.format(trajectory_name,
                                                    dataset_name))

            CometLogger.get_experiment().log_figure(
                figure=plotter.position_figure,
                figure_name='translation {} {}'.format(trajectory_name,
                                                       dataset_name))

            drift, ATE, RE = self._log_metrics(absolute_ground_truth, dataset,
                                               dataset_name, predictions,
                                               trajectory_name)
            drift_errors.append(drift)
            ATEs.append(ATE)
            REs.append(RE)

            self._log_matrix_poses(predictions, absolute_ground_truth,
                                   dataset_name, trajectory_name)
            self._log_quaternion_poses(predictions, absolute_ground_truth,
                                       dataset_name, trajectory_name)

        self._log_compounded_metrics(ATEs, REs, drift_errors)

        losses_figure = self._plot_trajectory_losses(
            trajectory_rotation_losses, trajectory_translation_losses)
        CometLogger.get_experiment().log_figure(
            figure=losses_figure, figure_name="trajectory_losses")

        # compute total avg losses
        translation_loss = self._complute_total_avg_loss(
            trajectory_translation_losses)
        rotation_loss = self._complute_total_avg_loss(
            trajectory_rotation_losses)

        CometLogger.get_experiment().log_metric(
            "Total Avg Translation loss (test phase)", translation_loss)
        CometLogger.get_experiment().log_metric(
            "Total Avg Rotation loss (test phase)", rotation_loss)
Exemple #16
0
def load_model(param: Parameters) -> nn.Module:
    if param.model == "DeepVO":
        CometLogger.print("Using DeepVO")
        model = DeepVO(param.img_h,
                       param.img_w,
                       rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "CoordConvDeepVO":
        CometLogger.print("Using CoordConvDeepVO")
        model = CoordConvDeepVO(param.img_h,
                                param.img_w,
                                rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "MagicVO":
        CometLogger.print("Using MagicVO")
        model = MagicVO(param.img_h,
                        param.img_w,
                        rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SelfAttentionVO":
        CometLogger.print("Using SelfAttentionVO")
        model = SelfAttentionVO(param.img_h,
                                param.img_w,
                                rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SplitSelfAttentionVO":
        CometLogger.print("Using SplitSelfAttentionVO")
        model = SplitSelfAttentionVO(param.img_h,
                                     param.img_w,
                                     rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "CoordConvSelfAttentionVO":
        CometLogger.print("Using CoordConvSelfAttentionVO")
        model = CoordConvSelfAttentionVO(param.img_h,
                                         param.img_w,
                                         rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SimpleSelfAttentionVO":
        CometLogger.print("Using SimpleSelfAttentionVO")
        model = SimpleSelfAttentionVO(param.img_h,
                                      param.img_w,
                                      rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "PositionalSimpleSelfAttentionVO":
        CometLogger.print("Using PositionalSimpleSelfAttentionVO")
        model = PositionalSimpleSelfAttentionVO(
            param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SkippedSelfAttention":
        CometLogger.print("Using SkippedSelfAttention")
        model = SkippedSelfAttention(param.img_h,
                                     param.img_w,
                                     rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "WeightedSelfAttentionVO":
        CometLogger.print("Using WeightedSelfAttentionVO")
        model = WeightedSelfAttentionVO(param.img_h,
                                        param.img_w,
                                        rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SelfAttentionVO_GlobRelOutput":
        CometLogger.print("Using SelfAttentionVO_GlobRelOutput")
        model = SelfAttentionVO_GlobRelOutput(
            param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "StackedSelfAttentionVO":
        CometLogger.print("Using StackedSelfAttentionVO")
        model = StackedSelfAttentionVO(param.img_h,
                                       param.img_w,
                                       rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "NoSelfAttentionVO":
        CometLogger.print("Using NoSelfAttentionVO")
        model = NoSelfAttentionVO(param.img_h,
                                  param.img_w,
                                  rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SnailSelfAttentionVO":
        CometLogger.print("Using SnailSelfAttentionVO")
        model = SnailSelfAttentionVO(param.img_h,
                                     param.img_w,
                                     rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "SnailVO":
        CometLogger.print("Using SnailSelfAttentionVO")
        model = SnailVO(param.img_h, param.img_w, 5)
    elif param.model == "GlobalRelativeSelfAttentionVO":
        CometLogger.print("Using GlobalRelativeSelfAttentionVO")
        model = GlobalRelativeSelfAttentionVO(
            param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "GlobalRelativeTransformerVO":
        CometLogger.print("Using GlobalRelativeTransformerVO")
        model = GlobalRelativeTransformerVO(
            param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "GlobalRelativeTransformerVO_globXAsKeyVal":
        CometLogger.print("Using GlobalRelativeTransformerVO_globXAsKeyVal")
        model = GlobalRelativeTransformerVO_globXAsKeyVal(
            param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size)
    elif param.model == "GlobalRelativeSelfAttentionVO_globXasKeyVal":
        CometLogger.print("Using GlobalRelativeSelfAttentionVO_globXasKeyVal")
        model = GlobalRelativeSelfAttentionVO_globXasKeyVal(
            param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size)
    else:
        CometLogger.print("{} was not implemented".format(param.model))
        raise NotImplementedError()

    _map_pretrained_model_to_current_model(param.pretrained_model, model)

    if cuda_is_available():
        CometLogger.print("Training with CUDA")
        model.cuda()
    else:
        CometLogger.print("CUDA not available. Training on the CPU.")

    return model