def run(self, batch: Any, batch_idx: int, dataloader_idx: int) -> AttributeDict: """Runs all the data splits and the ``on_batch_start`` and ``on_train_batch_start`` hooks Args: batch: the current batch to run the train step on batch_idx: the index of the current batch dataloader_idx: the index of the dataloader producing the current batch """ if batch is None: self._warning_cache.warn( "train_dataloader yielded None. If this was on purpose, ignore this warning..." ) return AttributeDict(signal=0, training_step_output=[[]]) # hook self.trainer.logger_connector.on_batch_start() response = self.trainer.call_hook("on_batch_start") if response == -1: return AttributeDict(signal=-1) # hook response = self.trainer.call_hook("on_train_batch_start", batch, batch_idx, dataloader_idx) if response == -1: return AttributeDict(signal=-1) self.trainer.fit_loop.epoch_loop.batch_progress.increment_started() super().run(batch, batch_idx, dataloader_idx) output = AttributeDict(signal=0, training_step_output=self.batch_outputs) self.batch_outputs = None # free memory return output
def _training_step_and_backward_closure( self, split_batch: Any, batch_idx: int, opt_idx: int, optimizer: Optimizer, hiddens: Tensor, return_result: AttributeDict, ) -> Optional[Tensor]: """Closure for training step and backward Args: split_batch: the current tbptt split of the batch batch_idx: the index of the current batch opt_idx: the index of the current optimizer optimizer: the current optimizer hiddens: the hidden state of the recurrent net return_result: the storage of the trainstep results """ result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens) if result is not None: return_result.update(result) return return_result.loss
def test_hyperparameters_saving(): data = DataModuleWithHparams_0(10, "foo", kwarg0="bar") assert data.hparams == AttributeDict({"arg0": 10, "arg1": "foo", "kwarg0": "bar"}) data = DataModuleWithHparams_1(Namespace(**{"hello": "world"}), "foo", kwarg0="bar") assert data.hparams == AttributeDict({"hello": "world"}) data = DataModuleWithHparams_1({"hello": "world"}, "foo", kwarg0="bar") assert data.hparams == AttributeDict({"hello": "world"}) if _OMEGACONF_AVAILABLE: data = DataModuleWithHparams_1(OmegaConf.create({"hello": "world"}), "foo", kwarg0="bar") assert data.hparams == OmegaConf.create({"hello": "world"})
def test_simple_hyperparameters_saving(): data = DataModuleWithHparams(10, "foo", kwarg0="bar") assert data.hparams == AttributeDict({ "arg0": 10, "arg1": "foo", "kwarg0": "bar" })
def _run_optimization(self, batch_idx: int, split_batch: Any, opt_idx: int = 0, optimizer: Optional[torch.optim.Optimizer] = None): """Runs closure (train step + backward) together with optimization if necessary. Args: batch_idx: the index of the current batch split_batch: the current tbptt split of the whole batch opt_idx: the index of the current optimizer optimizer: the current optimizer """ # TODO(@awaelchli): In v1.5, when optimizer_idx gets removed from training_step in manual_optimization, change # opt_idx=0 to opt_idx=None in the signature here # toggle model params self.run_optimization_start(opt_idx, optimizer) result = AttributeDict() closure = self.make_closure(split_batch, batch_idx, opt_idx, optimizer, self._hiddens, result) if self.should_accumulate(): # For gradient accumulation # ------------------- # calculate loss (train step + train step end) # ------------------- # automatic_optimization=True: perform ddp sync only when performing optimizer_step # automatic_optimization=False: don't block synchronization here with self.block_ddp_sync_behaviour(): closure() # ------------------------------ # BACKWARD PASS # ------------------------------ # gradient update with accumulated gradients else: if self.trainer.lightning_module.automatic_optimization: self.optimizer_step(optimizer, opt_idx, batch_idx, closure) if len(self.trainer.optimizers) > 1: # revert back to previous state self.trainer.lightning_module.untoggle_optimizer(opt_idx) else: result = self.training_step(split_batch, batch_idx, opt_idx, self._hiddens) if not result: # user decided to skip optimization return result # update running loss + reset accumulated loss self.update_running_loss(result.loss) self._process_closure_result(result) return result
def hparams(self) -> Union[AttributeDict, MutableMapping]: """The collection of hyperparameters saved with :meth:`save_hyperparameters`. It is mutable by the user. For the frozen set of initial hyperparameters, use :attr:`hparams_initial`. Returns: Mutable hyperparameters dictionary """ if not hasattr(self, "_hparams"): self._hparams = AttributeDict() return self._hparams
def _to_hparams_dict(hp: Union[dict, Namespace, str]): if isinstance(hp, Namespace): hp = vars(hp) if isinstance(hp, dict): hp = AttributeDict(hp) elif isinstance(hp, PRIMITIVE_TYPES): raise ValueError(f"Primitives {PRIMITIVE_TYPES} are not allowed.") elif not isinstance(hp, ALLOWED_CONFIG_TYPES): raise ValueError(f"Unsupported config type of {type(hp)}.") return hp
def __init__(self, config): super().__init__() self.config = AttributeDict( {key: getattr(config, key) for key in self.config_keys}) self.transform = None self.mnist_splits = None self.fmnist_splits = None
def hparams_initial(self) -> AttributeDict: """The collection of hyperparameters saved with :meth:`save_hyperparameters`. These contents are read-only. Manual updates to the saved hyperparameters can instead be performed through :attr:`hparams`. Returns: AttributeDict: immutable initial hyperparameters """ if not hasattr(self, "_hparams_initial"): return AttributeDict() # prevent any change return copy.deepcopy(self._hparams_initial)
def _training_step( self, split_batch: Any, batch_idx: int, opt_idx: int, hiddens: Tensor, ) -> Optional[AttributeDict]: """Performs the actual train step with the tied hooks. Args: split_batch: the current tbptt split of the current batch batch_idx: the index of the current batch opt_idx: the index of the current optimizer hiddens: the model's hidden state of the previous iteration Returns: an AttributeDict containing the loss value and the training step output. """ # give the PL module a result for logging model_ref = self.trainer.lightning_module with self.trainer.profiler.profile("model_forward"): step_kwargs = self._build_kwargs(split_batch, batch_idx, opt_idx, hiddens) # manually capture logged metrics model_ref._current_fx_name = 'training_step' with self.trainer.profiler.profile("training_step"): training_step_output = self.trainer.accelerator.training_step( step_kwargs) self.trainer.accelerator.post_training_step() training_step_output = self.trainer.call_hook( "training_step_end", training_step_output) self._check_training_step_output(training_step_output) training_step_output = self._process_training_step_output( training_step_output) if training_step_output is None: return closure_loss = None loss = None if self.trainer.lightning_module.automatic_optimization: # accumulate loss. if accumulate_grad_batches==1, no effect closure_loss = training_step_output.minimize / self.trainer.accumulate_grad_batches # the loss will get scaled for amp. avoid any modifications to it loss = closure_loss.detach().clone() return AttributeDict(closure_loss=closure_loss, loss=loss, training_step_output=training_step_output)
def _run_optimization( self, batch_idx: int, split_batch: Any, opt_idx: Optional[int] = None, optimizer: Optional[torch.optim.Optimizer] = None, ): """Runs closure (train step + backward) together with optimization if necessary. Args: batch_idx: the index of the current batch split_batch: the current tbptt split of the whole batch opt_idx: the index of the current optimizer or `None` in case of manual optimization optimizer: the current optimizer or `None` in case of manual optimization """ # toggle model params self._run_optimization_start(opt_idx, optimizer) result = AttributeDict() closure = self._make_closure(split_batch, batch_idx, opt_idx, optimizer, self._hiddens, result) if self.trainer.fit_loop.should_accumulate(): # For gradient accumulation # ------------------- # calculate loss (train step + train step end) # ------------------- # automatic_optimization=True: perform ddp sync only when performing optimizer_step # automatic_optimization=False: don't block synchronization here with self.block_ddp_sync_behaviour(): closure() # ------------------------------ # BACKWARD PASS # ------------------------------ # gradient update with accumulated gradients else: if self.trainer.lightning_module.automatic_optimization: self._optimizer_step(optimizer, opt_idx, batch_idx, closure) else: result = self._training_step(split_batch, batch_idx, opt_idx, self._hiddens) if result: # if no result, user decided to skip optimization # otherwise update running loss + reset accumulated loss self._update_running_loss(result.loss) self._process_closure_result(result) # untoggle model params self._run_optimization_end(opt_idx) return result
def test_hparams_save_yaml(tmpdir): hparams = dict(batch_size=32, learning_rate=0.001, data_root='./any/path/here', nasted=dict(any_num=123, anystr='abcd')) path_yaml = os.path.join(tmpdir, 'testing-hparams.yaml') save_hparams_to_yaml(path_yaml, hparams) assert load_hparams_from_yaml(path_yaml) == hparams save_hparams_to_yaml(path_yaml, Namespace(**hparams)) assert load_hparams_from_yaml(path_yaml) == hparams save_hparams_to_yaml(path_yaml, AttributeDict(hparams)) assert load_hparams_from_yaml(path_yaml) == hparams save_hparams_to_yaml(path_yaml, OmegaConf.create(hparams)) assert load_hparams_from_yaml(path_yaml) == hparams
def _training_step( self, split_batch: Any, batch_idx: int, opt_idx: int, hiddens: Tensor ) -> Optional[AttributeDict]: """Performs the actual train step with the tied hooks. Args: split_batch: the current tbptt split of the current batch batch_idx: the index of the current batch opt_idx: the index of the current optimizer hiddens: the model's hidden state of the previous iteration Returns: an AttributeDict containing the loss value and the training step output. """ # give the PL module a result for logging model_ref = self.trainer.lightning_module with self.trainer.profiler.profile("model_forward"): step_kwargs = _build_training_step_kwargs( self.trainer.lightning_module, self.trainer.optimizers, split_batch, batch_idx, opt_idx, hiddens ) # manually capture logged metrics model_ref._current_fx_name = "training_step" with self.trainer.profiler.profile("training_step"): training_step_output = self.trainer.accelerator.training_step(step_kwargs) self.trainer.accelerator.post_training_step() del step_kwargs training_step_output = self.trainer.call_hook("training_step_end", training_step_output) _check_training_step_output(self.trainer.lightning_module, training_step_output) result_collection, self._hiddens = _process_training_step_output(self.trainer, training_step_output) if result_collection is None: return None # output validation already done, here loss can't be None assert result_collection.minimize is not None # accumulate loss. if accumulate_grad_batches==1, no effect closure_loss = result_collection.minimize / self.trainer.accumulate_grad_batches # the loss will get scaled for amp. avoid any modifications to it loss = closure_loss.detach().clone() return AttributeDict(closure_loss=closure_loss, loss=loss, result_collection=result_collection)
def _training_step(self, split_batch: Any, batch_idx: int, hiddens: Tensor) -> Optional[AttributeDict]: """Performs the training step for manual optimization. Args: split_batch: the current tbptt split of the current batch batch_idx: the index of the current batch hiddens: the model's hidden state of the previous iteration Returns: an AttributeDict containing the training step output. """ # give the PL module a result for logging model_ref = self.trainer.lightning_module with self.trainer.profiler.profile("model_forward"): step_kwargs = _build_training_step_kwargs(model_ref, self.trainer.optimizers, split_batch, batch_idx, opt_idx=None, hiddens=hiddens) # manually capture logged metrics model_ref._current_fx_name = "training_step" with self.trainer.profiler.profile("training_step"): training_step_output = self.trainer.accelerator.training_step( step_kwargs) self.trainer.accelerator.post_training_step() del step_kwargs training_step_output = self.trainer.call_hook( "training_step_end", training_step_output) _check_training_step_output(self.trainer.lightning_module, training_step_output) result_collection, self._hiddens = _process_training_step_output( self.trainer, training_step_output) if result_collection is None: return return AttributeDict(closure_loss=None, loss=None, result_collection=result_collection)
def setUp(self) -> None: current_folder = os.path.dirname(os.path.abspath(__file__)) dataset_folder = os.path.join(os.path.dirname(current_folder), "datasets", "kitti") data_module_factory = KittiDataModuleFactory(range(0, 301, 1), directory=dataset_folder) self._data_module = data_module_factory.make_dataset_manager( final_image_size=(128, 384), transform_manager_parameters={"filters": True}, batch_size=1, num_workers=WORKERS_COUNT, split=(0.8, 0.1, 0.1)) self._data_module = DataModuleMock(self._data_module) pose_net = PoseNetResNet() depth_net = DepthNetResNet() criterion = UnsupervisedCriterion( self._data_module.get_cameras_calibration(), 1, 1) result_visualizer = ResultVisualizer( cameras_calibration=self._data_module.get_cameras_calibration()) params = AttributeDict(lr=1e-3, beta1=0.99, beta2=0.9) self._model = UnsupervisedDepthModel( params, pose_net, depth_net, criterion, result_visualizer=result_visualizer).cuda() self._tb_logger = TensorBoardLogger('logs/') self._second_tb_logger = TensorBoardLogger('logs1/') self._double_tb_logger = LoggerCollection( [self._tb_logger, self._second_tb_logger]) os.environ[ "MLFLOW_S3_ENDPOINT_URL"] = "http://ec2-3-134-104-174.us-east-2.compute.amazonaws.com:9000" os.environ["AWS_ACCESS_KEY_ID"] = "depth" os.environ["AWS_SECRET_ACCESS_KEY"] = "depth123" self._mlflow_logger = MLFlowLogger( experiment_name="test", tracking_uri= "http://ec2-3-134-104-174.us-east-2.compute.amazonaws.com:5001")
def __init__( self, hparams: LinearClassifierMethodParams = None, **kwargs, ): super().__init__() if hparams is None: hparams = self.params(**kwargs) elif isinstance(hparams, dict): hparams = self.params(**hparams, **kwargs) self.hparams = AttributeDict(attr.asdict(hparams)) # actually do a load that is a little more flexible self.model = utils.get_encoder(hparams.encoder_arch) self.dataset = utils.get_class_dataset(hparams.dataset_name) self.classifier = torch.nn.Linear(hparams.embedding_dim, self.dataset.num_classes)
def test_hparams_save_yaml(tmpdir): class Options(str, Enum): option1name = "option1val" option2name = "option2val" option3name = "option3val" hparams = dict( batch_size=32, learning_rate=0.001, data_root="./any/path/here", nested=dict(any_num=123, anystr="abcd"), switch=Options.option3name, ) path_yaml = os.path.join(tmpdir, "testing-hparams.yaml") def _compare_params(loaded_params, default_params: dict): assert isinstance(loaded_params, (dict, DictConfig)) assert loaded_params.keys() == default_params.keys() for k, v in default_params.items(): if isinstance(v, Enum): assert v.name == loaded_params[k] else: assert v == loaded_params[k] save_hparams_to_yaml(path_yaml, hparams) _compare_params(load_hparams_from_yaml(path_yaml, use_omegaconf=False), hparams) save_hparams_to_yaml(path_yaml, Namespace(**hparams)) _compare_params(load_hparams_from_yaml(path_yaml, use_omegaconf=False), hparams) save_hparams_to_yaml(path_yaml, AttributeDict(hparams)) _compare_params(load_hparams_from_yaml(path_yaml, use_omegaconf=False), hparams) if _OMEGACONF_AVAILABLE: save_hparams_to_yaml(path_yaml, OmegaConf.create(hparams)) _compare_params(load_hparams_from_yaml(path_yaml), hparams)
def test_hparams_pickle(tmpdir): ad = AttributeDict({'key1': 1, 'key2': 'abc'}) pkl = pickle.dumps(ad) assert ad == pickle.loads(pkl) pkl = cloudpickle.dumps(ad) assert ad == pickle.loads(pkl)
def hparams(self) -> Union[AttributeDict, dict, Namespace]: if not hasattr(self, "_hparams"): self._hparams = AttributeDict() return self._hparams
def hparams_initial(self) -> AttributeDict: if not hasattr(self, "_hparams_initial"): return AttributeDict() # prevent any change return copy.deepcopy(self._hparams_initial)
def __init__( self, hparams: Union[ModelParams, dict, None] = None, **kwargs, ): super().__init__() if hparams is None: hparams = self.params(**kwargs) elif isinstance(hparams, dict): hparams = self.params(**hparams, **kwargs) if isinstance(self.hparams, AttributeDict): self.hparams.update(AttributeDict(attr.asdict(hparams))) else: self.hparams = AttributeDict(attr.asdict(hparams)) # Check for configuration issues if (hparams.gather_keys_for_queue and not hparams.shuffle_batch_norm and not hparams.encoder_arch.startswith("ws_")): warnings.warn( "Configuration suspicious: gather_keys_for_queue without shuffle_batch_norm or weight standardization" ) some_negative_examples = hparams.use_negative_examples_from_batch or hparams.use_negative_examples_from_queue if hparams.loss_type == "ce" and not some_negative_examples: warnings.warn( "Configuration suspicious: cross entropy loss without negative examples" ) # Create encoder model self.model = utils.get_encoder(hparams.encoder_arch, hparams.dataset_name) # Create dataset self.dataset = utils.get_moco_dataset(hparams) if hparams.use_lagging_model: # "key" function (no grad) self.lagging_model = copy.deepcopy(self.model) for param in self.lagging_model.parameters(): param.requires_grad = False else: self.lagging_model = None self.projection_model = utils.MLP( hparams.embedding_dim, hparams.dim, hparams.mlp_hidden_dim, num_layers=hparams.projection_mlp_layers, normalization=get_mlp_normalization(hparams), weight_standardization=hparams.use_mlp_weight_standardization, ) self.prediction_model = utils.MLP( hparams.dim, hparams.dim, hparams.mlp_hidden_dim, num_layers=hparams.prediction_mlp_layers, normalization=get_mlp_normalization(hparams, prediction=True), weight_standardization=hparams.use_mlp_weight_standardization, ) if hparams.use_lagging_model: # "key" function (no grad) self.lagging_projection_model = copy.deepcopy( self.projection_model) for param in self.lagging_projection_model.parameters(): param.requires_grad = False else: self.lagging_projection_model = None # this classifier is used to compute representation quality each epoch self.sklearn_classifier = LogisticRegression(max_iter=100, solver="liblinear") if hparams.use_negative_examples_from_queue: # create the queue self.register_buffer("queue", torch.randn(hparams.dim, hparams.K)) self.queue = torch.nn.functional.normalize(self.queue, dim=0) self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long)) else: self.queue = None
def test_hparams_pickle(tmpdir): ad = AttributeDict({"key1": 1, "key2": "abc"}) pkl = pickle.dumps(ad) assert ad == pickle.loads(pkl) pkl = cloudpickle.dumps(ad) assert ad == pickle.loads(pkl)