def setUp(self) -> None:
     torch.autograd.set_detect_anomaly(True)
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_folder = os.path.join(current_folder, "data",
                                   "test_feature_extractor")
     self._data_module = DataModuleMock(
         DescriptorDataModule(dataset_folder, dataset_folder))
     parameters = AttributeDict(
         name="BinarizationPointNetwork",
         optimizer=AttributeDict(),
         classifier=AttributeDict(name="SimpleModel",
                                  input_dimension=512,
                                  hidden_dimensions=(128, ),
                                  output_dimension=64),
         regressor=AttributeDict(name="SimpleModel",
                                 input_dimension=64,
                                 hidden_dimensions=(64, ),
                                 output_dimension=3),
         criterion=AttributeDict(name="RegressionLoss"),
         metric_logging_frequency=1.)
     self._trainer = pl.Trainer(logger=TensorBoardLogger("lightning_logs"),
                                max_epochs=1,
                                gpus=1)
     factory = UniversalFactory(
         [RegressionLoss, SimpleModel, BinarizationPointNetwork])
     self._model = factory.make_from_parameters(parameters)
 def setUp(self) -> None:
     torch.autograd.set_detect_anomaly(True)
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_folder = os.path.join(current_folder, "datasets", "toy_dataset", "dataset.npz")
     self._data_module = ToyDataModule(dataset_folder)
     params = AttributeDict(
         model_name="pose_mvae",
         optimizer=AttributeDict(),
         image_encoder=AttributeDict(
             hidden_dimensions=[16, 32, 64],
             attention=True,
         ),
         pose_encoder=AttributeDict(
             hidden_dimensions=[256, 256],
             attention=True,
             constant_logvar=True,
             activation_type="swish",
         ),
         latent_dimension=128,
         beta=1,
         gamma=0,
         pose_distribution="se2",
         pose_augmentation=False,
         separate_elbo=True,
         delta_position=1,
         delta_angle=1,
     )
     self._model = PoseMVAEFactory().make_model(params)
예제 #3
0
 def setUp(self) -> None:
     torch.autograd.set_detect_anomaly(True)
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_path = os.path.join(current_folder, "datasets", "toy_dataset",
                                 "three_point_dataset.npz")
     self._data_module = ToyDataModule(dataset_path,
                                       rotation_augmentation=False)
     params = AttributeDict(
         optimizer=AttributeDict(),
         image_encoder=AttributeDict(
             hidden_dimensions=[16, 32, 64],
             attention=True,
         ),
         pose_encoder=AttributeDict(
             hidden_dimensions=[256, 256],
             attention=True,
             constant_logvar=False,
             activation_type="swish",
         ),
         latent_dimension=256,
         beta=1,
         gamma=0,
         pose_distribution="se2",
         pose_augmentation=False,
         separate_elbo=False,
         delta_position=1,
         delta_angle=1,
     )
     self._model = PoseMVAEFactory().make_pose_net(params)
     data = np.load(dataset_path, allow_pickle=True)["arr_0"]
     centers = data.item()["point_centers"]
     colors = data.item()["point_colors"]
     self._model.set_points_information(centers, colors, ((0, 4), (0, 4)),
                                        3.2, 0.1, 0.6)
예제 #4
0
    def training_step(self, split_batch, batch_idx, opt_idx, hiddens):
        with self.trainer.profiler.profile('model_forward'):
            args = self.build_train_args(split_batch, batch_idx, opt_idx, hiddens)
            training_step_output = self.trainer.accelerator_backend.training_step(args)
            training_step_output = self.trainer.call_hook('training_step_end', training_step_output)

            # ----------------------------
            # PROCESS THE RESULT
            # ----------------------------
            # format and reduce outputs accordingly
            training_step_output_for_epoch_end = training_step_output
            is_result_obj = isinstance(training_step_output, Result)

            # track batch size for weighted average
            if is_result_obj:
                training_step_output.track_batch_size(len(split_batch))

            # don't allow EvalResult in the training_step
            if isinstance(training_step_output, EvalResult):
                raise MisconfigurationException('training_step cannot return EvalResult, '
                                                'use a dict or TrainResult instead')

            # handle regular dicts
            if not is_result_obj:
                training_step_output = self.trainer.process_dict_result(training_step_output, train=True)

                training_step_output = AttributeDict(
                    batch_loss=training_step_output[0],
                    pbar_on_batch_end=training_step_output[1],
                    log_metrics=training_step_output[2],
                    callback_metrics=training_step_output[3],
                    hiddens=training_step_output[4],
                )

            # if the user decides to finally reduce things in epoch_end, save raw output without graphs
            if isinstance(training_step_output_for_epoch_end, torch.Tensor):
                training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach()
            elif is_result_obj:
                training_step_output_for_epoch_end = copy(training_step_output)
                training_step_output_for_epoch_end.detach()
            else:
                training_step_output_for_epoch_end = recursive_detach(training_step_output_for_epoch_end)

        # accumulate loss
        # (if accumulate_grad_batches = 1 no effect)
        closure_loss = training_step_output.minimize if is_result_obj else training_step_output.batch_loss
        closure_loss = closure_loss / self.trainer.accumulate_grad_batches

        # the loss will get scaled for amp. avoid any modifications to it
        untouched_loss = closure_loss.detach().clone()

        # result
        result = AttributeDict(
            closure_loss=closure_loss,
            loss=untouched_loss,
            training_step_output=training_step_output,
            training_step_output_for_epoch_end=training_step_output_for_epoch_end,
            hiddens=training_step_output.hiddens,
        )
        return result
예제 #5
0
def attributedict(dict_like: DictLike) -> AttributeDict:
    """If given a dict, it is converted it to an argparse.AttributeDict. Otherwise, no change is made"""
    if isinstance(dict_like, AttributeDict):
        return dict_like
    elif isinstance(dict_like, Namespace):
        return AttributeDict(vars(dict_like))
    elif isinstance(dict_like, dict):
        return AttributeDict(**dict_like)

    raise ValueError(
        f"Unable to convert type {type(dict_like)} to AttributeDict")
예제 #6
0
 def setUp(self) -> None:
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_folder = os.path.join(current_folder, "datasets",
                                   "toy_dataset", "dataset.npz")
     self._data_module = ToyDataModule(dataset_folder)
     params = AttributeDict(
         optimizer=AttributeDict(),
         encoder=AttributeDict(hidden_dimensions=[64, 128]),
         latent_dimension=20,
         beta=1,
         gamma=0,
     )
     self._model = VAE(params)
 def setUp(self) -> None:
     torch.autograd.set_detect_anomaly(True)
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_folder = os.path.join(current_folder, "data", "test_feature_extractor")
     self._data_module = DataModuleMock(DescriptorDataModule(dataset_folder, dataset_folder))
     self._params = AttributeDict(
         name="point_net",
         optimizer=AttributeDict(),
         hidden_dimensions=(100,),
         input_dimension=512,
     )
     self._trainer = pl.Trainer(logger=TensorBoardLogger("lightning_logs"), max_epochs=1, gpus=1)
     self._criterion = RGBandModelReprojectionLoss()
     self._model = PointNetwork(self._params, self._criterion)
예제 #8
0
 def setup(self, config, data):
     self.config = AttributeDict(config)
     self.datasets = data['datasets']
     self.word_dict = data['word_dict']
     self.classes = data['classes']
     self.device = init_device(config.cpu)
     set_seed(seed=self.config.seed)
 def __init__(self, trainer, cluster_environment=None):
     self.trainer = trainer
     self.cluster_environment = cluster_environment
     self.dist = AttributeDict(rank=0, device=None)
     self.train_loop = self.trainer.train
     self.validation_loop = self.trainer.run_evaluation
     self.test_loop = self.trainer.run_evaluation
예제 #10
0
    def test_load_undeepvo_checkpoint(self):
        filename = 'checkpoint_undeepvo.pth'
        # subprocess.run('checkpoint_download.sh')

        params = AttributeDict(
            lr=1e-4,
            beta1=0.9,
            beta2=0.99,
            lambda_position=0.01,
            lambda_rotation=0.1,
            batch_size=8,
        )

        pose_net = PoseNetResNet()
        depth_net = DepthNetResNet()

        params.update(scale_lr=5e-1,
                      initial_log_scale=4.59,
                      initial_log_min_depth=0.)

        model = ScaledUnsupervisedDepthModel(params,
                                             pose_net,
                                             depth_net,
                                             criterion=None)

        model_before = copy.deepcopy(model)
        load_undeepvo_checkpoint(model, filename)
        self.assertTrue(
            torch.any(model_before._pose_net._first_layer.weight !=
                      model._pose_net._first_layer.weight))
        self.assertTrue(
            torch.any(model_before._depth_net.skip_zero.weight !=
                      model._depth_net.skip_zero.weight))
예제 #11
0
    def setUp(self) -> None:
        current_folder = os.path.dirname(os.path.abspath(__file__))
        dataset_folder = os.path.join(
            os.path.dirname(current_folder), "datasets", "tum_rgbd",
            "rgbd_dataset_freiburg3_large_cabinet_validation")
        data_module_factory = TumVideoDataModuleFactory(dataset_folder)
        self._data_module = data_module_factory.make_data_module(
            final_image_size=(128, 384),
            transform_manager_parameters={"filters": True},
            batch_size=1,
            num_workers=WORKERS_COUNT,
            split=(0.8, 0.1, 0.1),
            device="cuda:0")
        self._data_module = DataModuleMock(self._data_module)

        pose_net = PoseNetResNet()
        depth_net = DepthNetResNet()
        criterion = MonoUnsupervisedCriterion(
            self._data_module.get_cameras_calibration(), 1, 1)

        params = AttributeDict(lr=1e-3, beta1=0.99, beta2=0.9)
        self._model = UnsupervisedDepthModel(params,
                                             pose_net,
                                             depth_net,
                                             criterion,
                                             stereo=False,
                                             mono=True).cuda()
예제 #12
0
def make_attribute_dict(dictionary):
    if type(dictionary) != dict:
        return dictionary
    new_dictionary = {}
    for key, value in dictionary.items():
        new_dictionary[key] = make_attribute_dict(value)
    return AttributeDict(**new_dictionary)
def test_attribute_dict(tmpdir):
    # Test initialization
    inputs = {"key1": 1, "key2": "abc"}
    ad = AttributeDict(inputs)
    for key, value in inputs.items():
        assert getattr(ad, key) == value

    # Test adding new items
    ad = AttributeDict()
    ad.update({"key1": 1})
    assert ad.key1 == 1

    # Test updating existing items
    ad = AttributeDict({"key1": 1})
    ad.key1 = 123
    assert ad.key1 == 123
예제 #14
0
    def test_unfreeze_last_layer(self):
        params = AttributeDict(
            lr=1e-4,
            beta1=0.9,
            beta2=0.99,
            lambda_position=0.01,
            lambda_rotation=0.1,
            batch_size=8,
        )

        pose_net = PoseNetResNet()
        depth_net = DepthNetResNet()

        params.update(scale_lr=5e-1,
                      initial_log_scale=4.59,
                      initial_log_min_depth=0.)

        model = ScaledUnsupervisedDepthModel(params,
                                             pose_net,
                                             depth_net,
                                             criterion=None)

        freeze_feature_extractor(model)
        unfreeze_last_layer(model)

        self.assertTrue(model._depth_net._last_conv.requires_grad)
        self.assertTrue(model._pose_net.rot3.requires_grad)
        self.assertTrue(model._pose_net.transl3.requires_grad)
예제 #15
0
    def __init__(
        self,
        memory_utilization: bool = True,
        gpu_utilization: bool = True,
        intra_step_time: bool = False,
        inter_step_time: bool = False,
        fan_speed: bool = False,
        temperature: bool = False,
    ):
        super().__init__()

        rank_zero_deprecation(
            "The `GPUStatsMonitor` callback was deprecated in v1.5 and will be removed in v1.7."
            " Please use the `DeviceStatsMonitor` callback instead.")

        if shutil.which("nvidia-smi") is None:
            raise MisconfigurationException(
                "Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed."
            )

        self._log_stats = AttributeDict({
            "memory_utilization": memory_utilization,
            "gpu_utilization": gpu_utilization,
            "intra_step_time": intra_step_time,
            "inter_step_time": inter_step_time,
            "fan_speed": fan_speed,
            "temperature": temperature,
        })

        # The logical device IDs for selected devices
        self._device_ids: List[int] = []  # will be assigned later in setup()

        # The unmasked real GPU IDs
        self._gpu_ids: List[str] = []  # will be assigned later in setup()
    def __init__(self,
                 memory_utilization: bool = True,
                 gpu_utilization: bool = True,
                 intra_step_time: bool = False,
                 inter_step_time: bool = False,
                 fan_speed: bool = False,
                 temperature: bool = False):
        super().__init__()

        if shutil.which('nvidia-smi') is None:
            raise MisconfigurationException(
                'Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed.'
            )

        self._log_stats = AttributeDict({
            'memory_utilization': memory_utilization,
            'gpu_utilization': gpu_utilization,
            'intra_step_time': intra_step_time,
            'inter_step_time': inter_step_time,
            'fan_speed': fan_speed,
            'temperature': temperature
        })

        # The logical device IDs for selected devices
        self._device_ids: List[int] = []  # will be assigned later in setup()

        # The unmasked real GPU IDs
        self._gpu_ids: List[str] = []  # will be assigned later in setup()
예제 #17
0
    def setUp(self) -> None:
        current_folder = os.path.dirname(os.path.abspath(__file__))
        dataset_folder = os.path.join(os.path.dirname(current_folder),
                                      "datasets", "kitti")
        data_module_factory = KittiDataModuleFactory(range(0, 301, 1),
                                                     directory=dataset_folder)
        self._data_module = data_module_factory.make_dataset_manager(
            final_image_size=(128, 384),
            transform_manager_parameters={"filters": True},
            batch_size=1,
            num_workers=WORKERS_COUNT,
            split=(0.8, 0.1, 0.1))
        self._data_module = DataModuleMock(self._data_module)

        pose_net = PoseNetResNet()
        depth_net = DepthNetResNet()
        criterion = UnsupervisedCriterion(
            self._data_module.get_cameras_calibration(), 1, 1)

        params = AttributeDict(
            lr=1e-3,
            beta1=0.99,
            beta2=0.9,
            scale_lr=1e-3,
            initial_log_scale=0.,
            initial_log_min_depth=0.,
            initial_log_pose_scale=0.,
        )
        self._model = ScaledUnsupervisedDepthModel(params, pose_net, depth_net,
                                                   criterion).cuda()
예제 #18
0
    def training_step(self, split_batch, batch_idx, opt_idx, hiddens):
        # give the PL module a result for logging
        model_ref = self.trainer.lightning_module

        with self.trainer.profiler.profile("model_forward"):
            step_kwargs = self._build_kwargs(split_batch, batch_idx, opt_idx, hiddens)

            # manually capture logged metrics
            model_ref._current_fx_name = 'training_step'
            with self.trainer.profiler.profile("training_step"):
                training_step_output = self.trainer.accelerator.training_step(step_kwargs)
                self.trainer.accelerator.post_training_step()

            training_step_output = self.trainer.call_hook("training_step_end", training_step_output)

            self._check_training_step_output(training_step_output)

            training_step_output = self._process_training_step_output(training_step_output)
            if training_step_output is None:
                return

        closure_loss = None
        loss = None
        if self.trainer.lightning_module.automatic_optimization:
            # accumulate loss. if accumulate_grad_batches==1, no effect
            closure_loss = training_step_output.minimize / self.trainer.accumulate_grad_batches
            # the loss will get scaled for amp. avoid any modifications to it
            loss = closure_loss.detach().clone()
        return AttributeDict(closure_loss=closure_loss, loss=loss, training_step_output=training_step_output)
예제 #19
0
    def _process_training_step_output(self, training_step_output, split_batch):
        training_step_output_for_epoch_end = training_step_output

        # enable validation_step return None
        if training_step_output_for_epoch_end is None:
            return None, None

        # -----------------------------------------
        # process hybrid (1.0)
        # -----------------------------------------
        # no need for these checks in 1.0.0
        # TODO: remove checks in 1.0.0
        is_tensor = isinstance(training_step_output_for_epoch_end, torch.Tensor)
        is_1_0_output = is_tensor or ("log" not in training_step_output and "progress_bar" not in training_step_output)
        if is_1_0_output:
            return self._process_training_step_output_1_0(training_step_output, split_batch)

        # -----------------------------------------
        # process old dict (deprecate 1.0)
        # -----------------------------------------
        training_step_output = self.trainer.process_dict_result(training_step_output, train=True)

        training_step_output = AttributeDict(
            batch_loss=training_step_output[0],
            pbar_on_batch_end=training_step_output[1],
            log_metrics=training_step_output[2],
            hiddens=training_step_output[3],
        )
        # if the user decides to finally reduce things in epoch_end, save raw output without graphs
        if isinstance(training_step_output_for_epoch_end, torch.Tensor):
            training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach()
        else:
            training_step_output_for_epoch_end = recursive_detach(training_step_output_for_epoch_end)

        return training_step_output_for_epoch_end, training_step_output
예제 #20
0
    def run_training_batch(self, batch, batch_idx, dataloader_idx):
        # bookkeeping
        self._hiddens = None

        optimizers = list(enumerate(self.trainer.optimizers))

        # track all outputs across time and num of optimizers
        batch_outputs = [[] for _ in range(len(optimizers))]

        if batch is None:
            self.warning_cache.warn("train_dataloader yielded None. If this was on purpose, ignore this warning...")
            return AttributeDict(signal=0, training_step_output=batch_outputs)

        # hook
        self.trainer.logger_connector.on_batch_start()
        response = self.trainer.call_hook("on_batch_start")
        if response == -1:
            return AttributeDict(signal=-1)

        # hook
        response = self.trainer.call_hook("on_train_batch_start", batch, batch_idx, dataloader_idx)
        if response == -1:
            return AttributeDict(signal=-1)

        # lightning module hook
        splits = self._tbptt_split_batch(batch)

        for split_idx, split_batch in enumerate(splits):
            self.split_idx = split_idx

            # let logger connector extract batch size
            self.trainer.logger_connector.on_train_split_start(batch_idx, split_idx, split_batch)

            if self.trainer.lightning_module.automatic_optimization:
                for opt_idx, optimizer in self.get_active_optimizers(batch_idx):
                    result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
                    if result:
                        batch_outputs[opt_idx].append(result.training_step_output)
            else:
                # in manual optimization, there is no looping over optimizers
                result = self._run_optimization(batch_idx, split_batch)
                if result:
                    batch_outputs[0].append(result.training_step_output)

        return AttributeDict(signal=0, training_step_output=batch_outputs)
예제 #21
0
 def setUp(self) -> None:
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_folder = os.path.join(current_folder, "datasets", "toy_dataset", "ball_dataset.npz")
     self._data_module = ToyBallDataModule(dataset_folder)
     params = AttributeDict(
         optimizer=AttributeDict(),
         image_encoder=AttributeDict(
             hidden_dimensions=[64, 128]
         ),
         pose_encoder=AttributeDict(
             hidden_dimensions=[32, 32],
             constant_logvar=True
         ),
         latent_dimension=20,
         beta=1,
         gamma=0,
     )
     self._model = PoseMVAEFactory().make_ball_pose_mvae_model(params)
예제 #22
0
    def training_step(self, split_batch, batch_idx, opt_idx, hiddens):
        # give the PL module a result for logging
        model_ref = self.trainer.lightning_module

        with self.trainer.profiler.profile("model_forward"):
            args = self.build_train_args(split_batch, batch_idx, opt_idx,
                                         hiddens)

            # manually capture logged metrics
            model_ref._current_fx_name = 'training_step'
            model_ref._results = Result()
            with self.trainer.profiler.profile("training_step"):
                training_step_output = self.trainer.accelerator.training_step(
                    args)
                self.trainer.accelerator.post_training_step()

            self.trainer.logger_connector.cache_logged_metrics()

            self._check_training_step_output(training_step_output)

            training_step_output = self.trainer.call_hook(
                "training_step_end", training_step_output)

            training_step_output_for_epoch_end, training_step_output = self._process_training_step_output(
                training_step_output, split_batch)
            is_result_obj = isinstance(training_step_output, Result)

            if training_step_output_for_epoch_end is None:
                return None

        # enable empty loss when using manual opt
        closure_loss = None
        untouched_loss = None

        if self.automatic_optimization:
            # accumulate loss
            # (if accumulate_grad_batches = 1 no effect)
            if is_result_obj:
                closure_loss = training_step_output.minimize
            else:
                closure_loss = training_step_output.batch_loss

            closure_loss = closure_loss / self.trainer.accumulate_grad_batches

            # the loss will get scaled for amp. avoid any modifications to it
            untouched_loss = closure_loss.detach().clone()

        # result
        result = AttributeDict(
            closure_loss=closure_loss,
            loss=untouched_loss,
            training_step_output=training_step_output,
            training_step_output_for_epoch_end=
            training_step_output_for_epoch_end,
            hiddens=training_step_output.hiddens,
        )
        return result
예제 #23
0
 def setUp(self) -> None:
     torch.autograd.set_detect_anomaly(True)
     dataset_folder = "/media/mikhail/Data3T/7scenes"
     self._data_module = DataModuleMock(
         SevenScenesDataModule("chess", dataset_folder, 2, 4))
     self._params = AttributeDict(name="PoseNet",
                                  optimizer=AttributeDict(),
                                  feature_extractor=AttributeDict(
                                      pretrained=True, ),
                                  criterion=AttributeDict(
                                      name="PoseNetCriterion",
                                      lr=0.1,
                                  ),
                                  feature_dimension=2048,
                                  drop_rate=0.5,
                                  bias=True,
                                  activation="relu")
     self._trainer = pl.Trainer(max_epochs=1, gpus=1)
     self._factory = UniversalFactory(
         [PoseNet, PoseNetCriterion, SimpleSE3Criterion, SE3Criterion])
예제 #24
0
    def __init__(self, trainer=None, cluster_environment=None, ddp_plugin=None):
        self.trainer = trainer
        self.nickname = None
        self.cluster_environment = cluster_environment
        self.dist = AttributeDict(rank=0, device=None)
        self.ddp_plugin = ddp_plugin

        if trainer is not None:
            self.train_loop = self.trainer.train
            self.validation_loop = self.trainer.run_evaluation
            self.test_loop = self.trainer.run_evaluation
예제 #25
0
 def setUp(self) -> None:
     torch.autograd.set_detect_anomaly(True)
     current_folder = os.path.dirname(os.path.abspath(__file__))
     dataset_folder = "/media/mikhail/Data3T/7scenes"
     self._data_module = DataModuleMock(
         SevenScenesDataModule("chess", dataset_folder, 2, 4))
     self._params = AttributeDict(name="pose_net",
                                  optimizer=AttributeDict(),
                                  feature_extractor=AttributeDict(
                                      name="resnet34",
                                      pretrained=True,
                                  ),
                                  criterion=AttributeDict(
                                      name="pose_net_criterion", ),
                                  feature_dimension=2048,
                                  drop_rate=0.5,
                                  bias=True,
                                  activation="relu")
     self._trainer = pl.Trainer(logger=TensorBoardLogger("lightning_logs"),
                                max_epochs=1,
                                gpus=1)
예제 #26
0
    def _run_optimization(self,
                          batch_idx,
                          split_idx,
                          split_batch,
                          opt_idx=0,
                          optimizer=None):
        # TODO: In v1.5, when optimizer_idx gets removed from training_step in manual_optimization, change
        #   opt_idx=0 to opt_idx=None in the signature here

        # toggle model params + set info to logger_connector
        self.run_train_split_start(split_idx, split_batch, opt_idx, optimizer)

        result = AttributeDict()
        closure = self.make_closure(split_batch, batch_idx, opt_idx, optimizer,
                                    self._hiddens, result)

        if self.should_accumulate():
            # For gradient accumulation

            # -------------------
            # calculate loss (train step + train step end)
            # -------------------
            # automatic_optimization=True: perform ddp sync only when performing optimizer_step
            # automatic_optimization=False: don't block synchronization here
            with self.block_ddp_sync_behaviour():
                closure()

        # ------------------------------
        # BACKWARD PASS
        # ------------------------------
        # gradient update with accumulated gradients
        else:
            if self.trainer.lightning_module.automatic_optimization:
                self.optimizer_step(optimizer, opt_idx, batch_idx, closure)
                if len(self.trainer.optimizers) > 1:
                    # revert back to previous state
                    self.trainer.lightning_module.untoggle_optimizer(opt_idx)
            else:
                result = self.training_step(split_batch, batch_idx, opt_idx,
                                            self._hiddens)

            if not result:
                # user decided to skip optimization
                return result

            # update running loss + reset accumulated loss
            self.update_running_loss(result.loss)

        self._process_closure_result(result)
        return result
예제 #27
0
def init_model_config(config_path):
    with open(config_path) as fp:
        args = yaml.load(fp, Loader=yaml.SafeLoader)

    # create directories that hold the shared data
    os.makedirs(args['result_dir'], exist_ok=True)
    if args['embed_cache_dir']:
        os.makedirs(args['embed_cache_dir'], exist_ok=True)

    # set relative path to absolute path (_path, _file, _dir)
    for k, v in args.items():
        if isinstance(v, str) and os.path.exists(v):
            args[k] = os.path.abspath(v)

    model_config = AttributeDict(args)
    set_seed(seed=model_config.seed)
    return model_config
    def training_step(self, split_batch, batch_idx, opt_idx, hiddens):
        # give the PL module a result for logging
        model = self.trainer.get_model()
        model._results = Result()
        model._current_fx_name = 'training_step'

        with self.trainer.profiler.profile('model_forward'):
            args = self.build_train_args(split_batch, batch_idx, opt_idx,
                                         hiddens)
            training_step_output = self.trainer.accelerator_backend.training_step(
                args)
            training_step_output = self.trainer.call_hook(
                'training_step_end', training_step_output)

            training_step_output_for_epoch_end, training_step_output = self._process_training_step_output(
                training_step_output, split_batch)
            is_result_obj = isinstance(training_step_output, Result)

            if training_step_output_for_epoch_end is None:
                return None

        # accumulate loss
        # (if accumulate_grad_batches = 1 no effect)
        if is_result_obj:
            closure_loss = training_step_output.minimize
        else:
            closure_loss = training_step_output.batch_loss

        closure_loss = closure_loss / self.trainer.accumulate_grad_batches

        # the loss will get scaled for amp. avoid any modifications to it
        untouched_loss = closure_loss.detach().clone()

        # result
        result = AttributeDict(
            closure_loss=closure_loss,
            loss=untouched_loss,
            training_step_output=training_step_output,
            training_step_output_for_epoch_end=
            training_step_output_for_epoch_end,
            hiddens=training_step_output.hiddens,
        )
        return result
예제 #29
0
    def __init__(self,
                 memory_utilization: bool = True,
                 gpu_utilization: bool = True,
                 intra_step_time: bool = False,
                 inter_step_time: bool = False,
                 fan_speed: bool = False,
                 temperature: bool = False):
        super().__init__()

        if shutil.which('nvidia-smi') is None:
            raise MisconfigurationException(
                'Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed.'
            )

        self._log_stats = AttributeDict({
            'memory_utilization': memory_utilization,
            'gpu_utilization': gpu_utilization,
            'intra_step_time': intra_step_time,
            'inter_step_time': inter_step_time,
            'fan_speed': fan_speed,
            'temperature': temperature
        })
예제 #30
0
    def setUp(self) -> None:
        params = AttributeDict(image_size=(128, 384),
                               batch_size=1,
                               transform_filters=True,
                               split=(0.8, 0.1, 0.1),
                               num_workers=WORKERS_COUNT,
                               detach=True,
                               levels=(1, ),
                               inner_lambda_s=0.15,
                               lr=1e-3,
                               beta1=0.99,
                               beta2=0.9)
        current_folder = os.path.dirname(os.path.abspath(__file__))
        dataset_folder = os.path.join(os.path.dirname(current_folder),
                                      "datasets", "kitti")
        data_module_factory = KittiDataModuleFactory(range(0, 301, 1),
                                                     directory=dataset_folder)
        self._data_module = data_module_factory.make_data_module_from_params(
            params)
        self._data_module = DataModuleMock(self._data_module)

        self._model = MultiUnsupervisedDepthModelFactory().make_model(
            params, self._data_module.get_cameras_calibration())