def get_config_from_folder_or_ckpt( folder: str, ckpt: Dict[str, Any] = None ) -> Dict[str, Any]: r"""gets config from folder or checkpoint Args: folder (str): folder from which config will be searched first ckpt (Optional[Dict[str, Any]]): optional checkpoint from which config might be found. Returns: config (Dict[str, Any]): config object """ configs = glob.glob(os.path.join(folder, "*.yaml")) if len(configs) > 0: assert len(configs) <= 1, ( "Multiple yaml files with the pretrained model. " + "MMF doesn't know what to do." ) config_file = configs[0] config = load_yaml(config_file) else: assert "config" in ckpt, ( "No configs provided with pretrained model" " while checkpoint also doesn't have configuration." ) config = ckpt["config"] return config
def setUp(self): self.trainer = argparse.Namespace() self.config = load_yaml(os.path.join("configs", "defaults.yaml")) self.config = OmegaConf.merge( self.config, { "model": "simple", "model_config": {}, "training": { "lr_scheduler": True, "lr_ratio": 0.1, "lr_steps": [1, 2], "use_warmup": False, }, }, ) # Keep original copy for testing purposes self.trainer.config = deepcopy(self.config) registry.register("config", self.trainer.config) self.trainer.model = SimpleModule() self.trainer.val_loader = torch.utils.data.DataLoader( NumbersDataset(), batch_size=self.config.training.batch_size) self.trainer.optimizer = torch.optim.Adam( self.trainer.model.parameters(), lr=1e-01) self.trainer.lr_scheduler_callback = LRSchedulerCallback( self.config, self.trainer)
def _create_checkpoint_file(self, path): model_folder = self._get_model_folder() model_file = os.path.join(model_folder, "model.pth") config_file = os.path.join(model_folder, "config.yaml") config = load_yaml(config_file) with PathManager.open(model_file, "rb") as f: ckpt = torch.load(f) ckpt["config"] = config torch.save(ckpt, path)
def get_trainer_config(): config = load_yaml(os.path.join("configs", "defaults.yaml")) return OmegaConf.merge( config, { "distributed": {}, "run_type": "train_val", "training": { "trainer": "lightning", "detect_anomaly": False, "evaluation_interval": 4, "log_interval": 2, "update_frequency": 1, "fp16": False, "batch_size": 1, "lr_scheduler": False, "tensorboard": False, }, "evaluation": { "use_cpu": False, "metrics": [] }, "optimizer": { "type": "adam_w", "params": { "lr": 5e-5, "eps": 1e-8 } }, "scheduler": { "type": "warmup_linear", "params": { "num_warmup_steps": 8, "num_training_steps": 8 }, }, "trainer": { "type": "lightning", "params": { "gpus": 1 if torch.cuda.is_available() else 0, "num_nodes": 1, "checkpoint_callback": False, "deterministic": True, "benchmark": False, "gradient_clip_val": 0.0, "val_check_interval": 4, "log_every_n_steps": 2, "progress_bar_refresh_rate": 0, "accumulate_grad_batches": 1, "precision": 32, "num_sanity_val_steps": 0, "limit_val_batches": 1.0, "logger": False, }, }, }, )
def setUp(self): import argparse torch.manual_seed(1234) # An easy way to get a AttributeDict object self.trainer = argparse.Namespace() self.config = load_yaml(os.path.join("configs", "defaults.yaml")) self.config = OmegaConf.merge( self.config, { "model": "simple", "model_config": {}, "checkpoint": { "save_git_details": False, "reset": { "optimizer": False, "counts": False, "all": False, "fp16_scaler": False, }, "pretrained_state_mapping": { "base_test": "base" }, "max_to_keep": 5, }, "config_override": None, "training": { "checkpoint_interval": 1, "early_stop": { "criteria": "val/total_loss", "minimize": True }, "lr_scheduler": True, }, "scheduler": { "type": "multi_step", "params": { "use_warmup": False, "lr_steps": [10, 20], "lr_ratio": 0.1, "warmup_factor": 1.0, }, }, }, ) # Keep original copy for testing purposes self.trainer.config = deepcopy(self.config) self.trainer.model = SimpleModule() self.trainer.scaler = torch.cuda.amp.GradScaler() self.trainer.optimizer = torch.optim.Adam( self.trainer.model.parameters(), lr=1e-01) self.trainer.lr_scheduler_callback = LRSchedulerCallback( self.config, self.trainer)
def is_zoo_path(self, path) -> bool: from mmf.utils.configuration import get_mmf_env, load_yaml model_zoo = load_yaml(get_mmf_env(key="model_zoo")) OmegaConf.set_struct(model_zoo, True) OmegaConf.set_readonly(model_zoo, True) try: model_config = OmegaConf.select(model_zoo, path) return model_config is not None except omegaconf.errors.OmegaConfBaseException: return False
def _create_checkpoint_file(self, path): home = str(Path.home()) data_dir = get_mmf_env(key="data_dir") model_folder = os.path.join(home, data_dir, "models", "mmbt.hateful_memes.images") model_file = os.path.join(model_folder, "model.pth") config_file = os.path.join(model_folder, "config.yaml") config = load_yaml(config_file) with PathManager.open(model_file, "rb") as f: ckpt = torch.load(f) ckpt["config"] = config torch.save(ckpt, path)
def _load_pretrained_model(model_name_or_path, *args, **kwargs): if PathManager.exists(model_name_or_path): download_path = model_name_or_path model_name = model_name_or_path else: download_path = download_pretrained_model(model_name_or_path, *args, **kwargs) model_name = model_name_or_path configs = glob.glob(os.path.join(download_path, "*.yaml")) assert len(configs) <= 1, ( "Multiple yaml files with the pretrained model. " + "MMF doesn't know what to do.") ckpts = [] allowed_ckpt_types = [f"*{ext}" for ext in ALLOWED_CHECKPOINT_EXTS] for ckpt_type in allowed_ckpt_types: ckpts.extend(glob.glob(os.path.join(download_path, ckpt_type))) assert ( len(ckpts) == 1 ), "None or multiple checkpoints files. MMF doesn't know what to do." _hack_imports() with PathManager.open(ckpts[0], "rb") as f: ckpt = torch.load(f, map_location=lambda storage, loc: storage) # If configs are not present, will ckpt provide the config? if len(configs) == 0: assert "config" in ckpt, ( "No configs provided with pretrained model" " while checkpoint also doesn't have configuration.") config = ckpt["config"] else: config = load_yaml(configs[0]) model_config = config.get("model_config", config) ckpt = ckpt.get("model", ckpt) # Also handle the case of model_name is path if PathManager.exists(model_name): # This shouldn't happen assert len(model_config.keys() ) == 1, "Checkpoint contains more than one model?" # Take first key model_config = model_config[list(model_config.keys())[0]] else: model_config = model_config.get( model_name.split(os.path.sep)[-1].split(".")[0]) return {"config": model_config, "checkpoint": ckpt, "full_config": config}
def download_pretrained_model(model_name, *args, **kwargs): import omegaconf from omegaconf import OmegaConf from mmf.utils.configuration import load_yaml, get_mmf_env model_zoo = load_yaml(get_mmf_env(key="model_zoo")) OmegaConf.set_struct(model_zoo, True) OmegaConf.set_readonly(model_zoo, True) data_dir = get_absolute_path(get_mmf_env("data_dir")) model_data_dir = os.path.join(data_dir, "models") download_path = os.path.join(model_data_dir, model_name) try: model_config = OmegaConf.select(model_zoo, model_name) except omegaconf.errors.OmegaConfBaseException as e: print(f"No such model name {model_name} defined in mmf zoo") raise e if "version" not in model_config or "resources" not in model_config: # Version and Resources are not present time to try the defaults try: model_config = model_config.defaults download_path = os.path.join(model_data_dir, model_name + ".defaults") except omegaconf.errors.OmegaConfBaseException as e: print( f"Model name {model_name} doesn't specify 'resources' and 'version' " "while no defaults have been provided" ) raise e # Download requirements if any specified by "zoo_requirements" field # This can either be a list or a string if "zoo_requirements" in model_config: requirements = model_config.zoo_requirements if isinstance(requirements, str): requirements = [requirements] for item in requirements: download_pretrained_model(item, *args, **kwargs) version = model_config.version resources = model_config.resources if is_master(): download_resources(resources, download_path, version) synchronize() return download_path
def load_pretrained_model(model_name_or_path, *args, **kwargs): # If this is a file, then load this directly else download and load if PathManager.exists(model_name_or_path): download_path = model_name_or_path model_name = model_name_or_path else: download_path = download_pretrained_model(model_name_or_path, *args, **kwargs) model_name = model_name_or_path config_folder_path = os.path.dirname(download_path) configs = glob.glob(os.path.join(config_folder_path, "*.yaml")) assert len(configs) <= 1, ( "Multiple yaml files with the pretrained model. " + "MMF doesn't know what to do.") ckpts = [] allowed_ckpt_types = ("*.ckpt", "*.pth", "*.pt") for ckpt_type in allowed_ckpt_types: if download_path.endswith(ckpt_type.split("*")[1]): ckpts.extend(glob.glob(download_path)) else: ckpts.extend(glob.glob(os.path.join(download_path, ckpt_type))) assert ( len(ckpts) == 1 ), "None or multiple checkpoints files. MMF doesn't know what to do." _hack_imports() ckpt = torch.load(ckpts[0], map_location=lambda storage, loc: storage) # If configs are not present, will ckpt provide the config? if len(configs) == 0: assert "config" in ckpt, ( "No configs provided with pretrained model " " while checkpoint also doesn't have configuration.") config = ckpt["config"] else: config = load_yaml(configs[0]) model_config = config.get("model_config", config) ckpt = ckpt.get("model", ckpt) # Also handle the case of model_name is path model_config = model_config.get( model_name.split(os.path.sep)[-1].split(".")[0]) return {"config": model_config, "checkpoint": ckpt, "full_config": config}
def setUp(self): self.tmpdir = tempfile.mkdtemp() self.trainer = argparse.Namespace() self.config = load_yaml(os.path.join("configs", "defaults.yaml")) self.config = OmegaConf.merge( self.config, { "model": "simple", "model_config": {}, "training": { "checkpoint_interval": 1, "evaluation_interval": 10, "early_stop": { "criteria": "val/total_loss" }, "batch_size": 16, "log_interval": 10, "logger_level": "info", }, "env": { "save_dir": self.tmpdir }, }, ) # Keep original copy for testing purposes self.trainer.config = deepcopy(self.config) registry.register("config", self.trainer.config) setup_logger() self.report = Mock(spec=Report) self.report.dataset_name = "abcd" self.report.dataset_type = "test" self.trainer.model = SimpleModule() self.trainer.val_loader = torch.utils.data.DataLoader( NumbersDataset(), batch_size=self.config.training.batch_size) self.trainer.optimizer = torch.optim.Adam( self.trainer.model.parameters(), lr=1e-01) self.trainer.device = "cpu" self.trainer.num_updates = 0 self.trainer.current_iteration = 0 self.trainer.current_epoch = 0 self.trainer.max_updates = 0 self.trainer.meter = Meter() self.cb = LogisticsCallback(self.config, self.trainer)
def setUp(self): self.trainer = argparse.Namespace() self.config = load_yaml(os.path.join("configs", "defaults.yaml")) self.config = OmegaConf.merge( self.config, { "model": "simple", "model_config": {}, "training": { "lr_scheduler": True, "lr_ratio": 0.1, "lr_steps": [1, 2], "use_warmup": False, "callbacks": [{ "type": "test_callback", "params": {} }], }, }, ) # Keep original copy for testing purposes self.trainer.config = deepcopy(self.config) registry.register("config", self.trainer.config) model = SimpleModel(SimpleModel.Config()) model.build() self.trainer.model = model self.trainer.val_loader = torch.utils.data.DataLoader( NumbersDataset(2), batch_size=self.config.training.batch_size) self.trainer.optimizer = torch.optim.Adam( self.trainer.model.parameters(), lr=1e-01) self.trainer.lr_scheduler_callback = LRSchedulerCallback( self.config, self.trainer) self.trainer.callbacks = [] for callback in self.config.training.get("callbacks", []): callback_type = callback.type callback_param = callback.params callback_cls = registry.get_callback_class(callback_type) self.trainer.callbacks.append( callback_cls(self.trainer.config, self.trainer, **callback_param))
def test_caption_bleu4(self): path = os.path.join( os.path.abspath(__file__), "../../../mmf/configs/datasets/coco/defaults.yaml", ) config = load_yaml(os.path.abspath(path)) captioning_config = config.dataset_config.coco caption_processor_config = captioning_config.processors.caption_processor vocab_path = os.path.join(os.path.abspath(__file__), "..", "..", "data", "vocab.txt") caption_processor_config.params.vocab.type = "random" caption_processor_config.params.vocab.vocab_file = os.path.abspath( vocab_path) caption_processor = CaptionProcessor(caption_processor_config.params) registry.register("coco_caption_processor", caption_processor) caption_bleu4 = metrics.CaptionBleu4Metric() expected = Sample() predicted = dict() # Test complete match expected.answers = torch.empty((5, 5, 10)) expected.answers.fill_(4) predicted["scores"] = torch.zeros((5, 10, 19)) predicted["scores"][:, :, 4] = 1.0 self.assertEqual( caption_bleu4.calculate(expected, predicted).item(), 1.0) # Test partial match expected.answers = torch.empty((5, 5, 10)) expected.answers.fill_(4) predicted["scores"] = torch.zeros((5, 10, 19)) predicted["scores"][:, 0:5, 4] = 1.0 predicted["scores"][:, 5:, 18] = 1.0 self.assertAlmostEqual( caption_bleu4.calculate(expected, predicted).item(), 0.3928, 4)
def test_save_config(self): with mock_env_with_temp() as d: Checkpoint(self.trainer) config = load_yaml(os.path.join(d, "config.yaml")) self.assertTrue(config == self.config) self.assertTrue(config == self.trainer.config)
def _test_zoo_for_keys(self, path): zoo_config = load_yaml(path) self._recurse_on_config(zoo_config)
def _test_zoo(self, path: str, callback: typing.Callable): zoo_config = load_yaml(path) self._recurse_on_config(zoo_config, callback=callback)
def _get_config(self, path): path = os.path.join(os.path.abspath(__file__), path) config = load_yaml(os.path.abspath(path)) return config
def __init__( self, num_train_data, max_updates, max_epochs, config=None, optimizer=None, update_frequency=1, batch_size=1, batch_size_per_device=None, fp16=False, on_update_end_fn=None, scheduler_config=None, grad_clipping_config=None, tensorboard=False, ): if config is None: self.config = load_yaml("configs/defaults.yaml") self.config = OmegaConf.merge( self.config, { "training": { "detect_anomaly": False, "evaluation_interval": 10000, "update_frequency": update_frequency, "fp16": fp16, "batch_size": batch_size, "batch_size_per_device": batch_size_per_device, "tensorboard": tensorboard, "run_type": "train", "num_workers": 0, }, "datasets": "", "model": "", }, ) self.training_config = self.config.training else: config.training.batch_size = batch_size config.training.fp16 = fp16 config.training.update_frequency = update_frequency config.training.tensorboard = tensorboard self.training_config = config.training self.config = config registry.register("config", self.config) if max_updates is not None: self.training_config["max_updates"] = max_updates if max_epochs is not None: self.training_config["max_epochs"] = max_epochs self.model = SimpleModel({"in_dim": 1}) self.model.build() if torch.cuda.is_available(): self.model = self.model.cuda() self.device = "cuda" else: self.device = "cpu" self.distributed = False if optimizer is None: self.optimizer = MagicMock() self.optimizer.step = MagicMock(return_value=None) self.optimizer.zero_grad = MagicMock(return_value=None) else: self.optimizer = optimizer if scheduler_config: config.training.lr_scheduler = True config.scheduler = scheduler_config self.lr_scheduler_callback = LRSchedulerCallback(config, self) self.callbacks.append(self.lr_scheduler_callback) on_update_end_fn = (on_update_end_fn if on_update_end_fn else self.lr_scheduler_callback.on_update_end) if grad_clipping_config: self.training_config.clip_gradients = True self.training_config.max_grad_l2_norm = grad_clipping_config[ "max_grad_l2_norm"] self.training_config.clip_norm_mode = grad_clipping_config[ "clip_norm_mode"] self.on_batch_start = MagicMock(return_value=None) self.on_update_start = MagicMock(return_value=None) self.logistics_callback = MagicMock(return_value=None) self.logistics_callback.log_interval = MagicMock(return_value=None) self.on_batch_end = MagicMock(return_value=None) self.on_update_end = (on_update_end_fn if on_update_end_fn else MagicMock(return_value=None)) self.on_validation_start = MagicMock(return_value=None) self.scaler = torch.cuda.amp.GradScaler(enabled=False) self.early_stop_callback = MagicMock(return_value=None) self.on_validation_end = MagicMock(return_value=None) self.metrics = MagicMock(return_value={}) self.num_data = num_train_data self.run_type = self.config.get("run_type", "train")