Ejemplo n.º 1
0
def get_config_from_folder_or_ckpt(
    folder: str, ckpt: Dict[str, Any] = None
) -> Dict[str, Any]:
    r"""gets config from folder or checkpoint

    Args:
        folder (str): folder from which config will be searched first
        ckpt (Optional[Dict[str, Any]]): optional checkpoint from which config
            might be found.

    Returns:
        config (Dict[str, Any]): config object
    """
    configs = glob.glob(os.path.join(folder, "*.yaml"))

    if len(configs) > 0:
        assert len(configs) <= 1, (
            "Multiple yaml files with the pretrained model. "
            + "MMF doesn't know what to do."
        )
        config_file = configs[0]
        config = load_yaml(config_file)
    else:
        assert "config" in ckpt, (
            "No configs provided with pretrained model"
            " while checkpoint also doesn't have configuration."
        )
        config = ckpt["config"]

    return config
Ejemplo n.º 2
0
    def setUp(self):
        self.trainer = argparse.Namespace()
        self.config = load_yaml(os.path.join("configs", "defaults.yaml"))
        self.config = OmegaConf.merge(
            self.config,
            {
                "model": "simple",
                "model_config": {},
                "training": {
                    "lr_scheduler": True,
                    "lr_ratio": 0.1,
                    "lr_steps": [1, 2],
                    "use_warmup": False,
                },
            },
        )
        # Keep original copy for testing purposes
        self.trainer.config = deepcopy(self.config)
        registry.register("config", self.trainer.config)

        self.trainer.model = SimpleModule()
        self.trainer.val_loader = torch.utils.data.DataLoader(
            NumbersDataset(), batch_size=self.config.training.batch_size)

        self.trainer.optimizer = torch.optim.Adam(
            self.trainer.model.parameters(), lr=1e-01)
        self.trainer.lr_scheduler_callback = LRSchedulerCallback(
            self.config, self.trainer)
Ejemplo n.º 3
0
 def _create_checkpoint_file(self, path):
     model_folder = self._get_model_folder()
     model_file = os.path.join(model_folder, "model.pth")
     config_file = os.path.join(model_folder, "config.yaml")
     config = load_yaml(config_file)
     with PathManager.open(model_file, "rb") as f:
         ckpt = torch.load(f)
     ckpt["config"] = config
     torch.save(ckpt, path)
Ejemplo n.º 4
0
def get_trainer_config():
    config = load_yaml(os.path.join("configs", "defaults.yaml"))
    return OmegaConf.merge(
        config,
        {
            "distributed": {},
            "run_type": "train_val",
            "training": {
                "trainer": "lightning",
                "detect_anomaly": False,
                "evaluation_interval": 4,
                "log_interval": 2,
                "update_frequency": 1,
                "fp16": False,
                "batch_size": 1,
                "lr_scheduler": False,
                "tensorboard": False,
            },
            "evaluation": {
                "use_cpu": False,
                "metrics": []
            },
            "optimizer": {
                "type": "adam_w",
                "params": {
                    "lr": 5e-5,
                    "eps": 1e-8
                }
            },
            "scheduler": {
                "type": "warmup_linear",
                "params": {
                    "num_warmup_steps": 8,
                    "num_training_steps": 8
                },
            },
            "trainer": {
                "type": "lightning",
                "params": {
                    "gpus": 1 if torch.cuda.is_available() else 0,
                    "num_nodes": 1,
                    "checkpoint_callback": False,
                    "deterministic": True,
                    "benchmark": False,
                    "gradient_clip_val": 0.0,
                    "val_check_interval": 4,
                    "log_every_n_steps": 2,
                    "progress_bar_refresh_rate": 0,
                    "accumulate_grad_batches": 1,
                    "precision": 32,
                    "num_sanity_val_steps": 0,
                    "limit_val_batches": 1.0,
                    "logger": False,
                },
            },
        },
    )
Ejemplo n.º 5
0
    def setUp(self):
        import argparse

        torch.manual_seed(1234)
        # An easy way to get a AttributeDict object
        self.trainer = argparse.Namespace()
        self.config = load_yaml(os.path.join("configs", "defaults.yaml"))
        self.config = OmegaConf.merge(
            self.config,
            {
                "model": "simple",
                "model_config": {},
                "checkpoint": {
                    "save_git_details": False,
                    "reset": {
                        "optimizer": False,
                        "counts": False,
                        "all": False,
                        "fp16_scaler": False,
                    },
                    "pretrained_state_mapping": {
                        "base_test": "base"
                    },
                    "max_to_keep": 5,
                },
                "config_override": None,
                "training": {
                    "checkpoint_interval": 1,
                    "early_stop": {
                        "criteria": "val/total_loss",
                        "minimize": True
                    },
                    "lr_scheduler": True,
                },
                "scheduler": {
                    "type": "multi_step",
                    "params": {
                        "use_warmup": False,
                        "lr_steps": [10, 20],
                        "lr_ratio": 0.1,
                        "warmup_factor": 1.0,
                    },
                },
            },
        )
        # Keep original copy for testing purposes
        self.trainer.config = deepcopy(self.config)

        self.trainer.model = SimpleModule()
        self.trainer.scaler = torch.cuda.amp.GradScaler()

        self.trainer.optimizer = torch.optim.Adam(
            self.trainer.model.parameters(), lr=1e-01)

        self.trainer.lr_scheduler_callback = LRSchedulerCallback(
            self.config, self.trainer)
Ejemplo n.º 6
0
    def is_zoo_path(self, path) -> bool:
        from mmf.utils.configuration import get_mmf_env, load_yaml

        model_zoo = load_yaml(get_mmf_env(key="model_zoo"))
        OmegaConf.set_struct(model_zoo, True)
        OmegaConf.set_readonly(model_zoo, True)

        try:
            model_config = OmegaConf.select(model_zoo, path)
            return model_config is not None
        except omegaconf.errors.OmegaConfBaseException:
            return False
Ejemplo n.º 7
0
 def _create_checkpoint_file(self, path):
     home = str(Path.home())
     data_dir = get_mmf_env(key="data_dir")
     model_folder = os.path.join(home, data_dir, "models",
                                 "mmbt.hateful_memes.images")
     model_file = os.path.join(model_folder, "model.pth")
     config_file = os.path.join(model_folder, "config.yaml")
     config = load_yaml(config_file)
     with PathManager.open(model_file, "rb") as f:
         ckpt = torch.load(f)
     ckpt["config"] = config
     torch.save(ckpt, path)
Ejemplo n.º 8
0
def _load_pretrained_model(model_name_or_path, *args, **kwargs):
    if PathManager.exists(model_name_or_path):
        download_path = model_name_or_path
        model_name = model_name_or_path
    else:
        download_path = download_pretrained_model(model_name_or_path, *args,
                                                  **kwargs)
        model_name = model_name_or_path

    configs = glob.glob(os.path.join(download_path, "*.yaml"))
    assert len(configs) <= 1, (
        "Multiple yaml files with the pretrained model. " +
        "MMF doesn't know what to do.")

    ckpts = []
    allowed_ckpt_types = [f"*{ext}" for ext in ALLOWED_CHECKPOINT_EXTS]
    for ckpt_type in allowed_ckpt_types:
        ckpts.extend(glob.glob(os.path.join(download_path, ckpt_type)))

    assert (
        len(ckpts) == 1
    ), "None or multiple checkpoints files. MMF doesn't know what to do."

    _hack_imports()

    with PathManager.open(ckpts[0], "rb") as f:
        ckpt = torch.load(f, map_location=lambda storage, loc: storage)
    # If configs are not present, will ckpt provide the config?
    if len(configs) == 0:
        assert "config" in ckpt, (
            "No configs provided with pretrained model"
            " while checkpoint also doesn't have configuration.")
        config = ckpt["config"]
    else:
        config = load_yaml(configs[0])
    model_config = config.get("model_config", config)
    ckpt = ckpt.get("model", ckpt)

    # Also handle the case of model_name is path
    if PathManager.exists(model_name):
        # This shouldn't happen
        assert len(model_config.keys()
                   ) == 1, "Checkpoint contains more than one model?"
        # Take first key
        model_config = model_config[list(model_config.keys())[0]]
    else:
        model_config = model_config.get(
            model_name.split(os.path.sep)[-1].split(".")[0])

    return {"config": model_config, "checkpoint": ckpt, "full_config": config}
Ejemplo n.º 9
0
def download_pretrained_model(model_name, *args, **kwargs):
    import omegaconf
    from omegaconf import OmegaConf

    from mmf.utils.configuration import load_yaml, get_mmf_env

    model_zoo = load_yaml(get_mmf_env(key="model_zoo"))
    OmegaConf.set_struct(model_zoo, True)
    OmegaConf.set_readonly(model_zoo, True)

    data_dir = get_absolute_path(get_mmf_env("data_dir"))
    model_data_dir = os.path.join(data_dir, "models")
    download_path = os.path.join(model_data_dir, model_name)

    try:
        model_config = OmegaConf.select(model_zoo, model_name)
    except omegaconf.errors.OmegaConfBaseException as e:
        print(f"No such model name {model_name} defined in mmf zoo")
        raise e

    if "version" not in model_config or "resources" not in model_config:
        # Version and Resources are not present time to try the defaults
        try:
            model_config = model_config.defaults
            download_path = os.path.join(model_data_dir, model_name + ".defaults")
        except omegaconf.errors.OmegaConfBaseException as e:
            print(
                f"Model name {model_name} doesn't specify 'resources' and 'version' "
                "while no defaults have been provided"
            )
            raise e

    # Download requirements if any specified by "zoo_requirements" field
    # This can either be a list or a string
    if "zoo_requirements" in model_config:
        requirements = model_config.zoo_requirements
        if isinstance(requirements, str):
            requirements = [requirements]
        for item in requirements:
            download_pretrained_model(item, *args, **kwargs)

    version = model_config.version
    resources = model_config.resources

    if is_master():
        download_resources(resources, download_path, version)
    synchronize()

    return download_path
Ejemplo n.º 10
0
def load_pretrained_model(model_name_or_path, *args, **kwargs):
    # If this is a file, then load this directly else download and load
    if PathManager.exists(model_name_or_path):
        download_path = model_name_or_path
        model_name = model_name_or_path
    else:
        download_path = download_pretrained_model(model_name_or_path, *args,
                                                  **kwargs)
        model_name = model_name_or_path
    config_folder_path = os.path.dirname(download_path)
    configs = glob.glob(os.path.join(config_folder_path, "*.yaml"))
    assert len(configs) <= 1, (
        "Multiple yaml files with the pretrained model. " +
        "MMF doesn't know what to do.")

    ckpts = []
    allowed_ckpt_types = ("*.ckpt", "*.pth", "*.pt")
    for ckpt_type in allowed_ckpt_types:
        if download_path.endswith(ckpt_type.split("*")[1]):
            ckpts.extend(glob.glob(download_path))
        else:
            ckpts.extend(glob.glob(os.path.join(download_path, ckpt_type)))

    assert (
        len(ckpts) == 1
    ), "None or multiple checkpoints files. MMF doesn't know what to do."

    _hack_imports()

    ckpt = torch.load(ckpts[0], map_location=lambda storage, loc: storage)
    # If configs are not present, will ckpt provide the config?
    if len(configs) == 0:
        assert "config" in ckpt, (
            "No configs provided with pretrained model "
            " while checkpoint also doesn't have configuration.")
        config = ckpt["config"]
    else:
        config = load_yaml(configs[0])

    model_config = config.get("model_config", config)
    ckpt = ckpt.get("model", ckpt)
    # Also handle the case of model_name is path
    model_config = model_config.get(
        model_name.split(os.path.sep)[-1].split(".")[0])

    return {"config": model_config, "checkpoint": ckpt, "full_config": config}
Ejemplo n.º 11
0
    def setUp(self):
        self.tmpdir = tempfile.mkdtemp()
        self.trainer = argparse.Namespace()
        self.config = load_yaml(os.path.join("configs", "defaults.yaml"))
        self.config = OmegaConf.merge(
            self.config,
            {
                "model": "simple",
                "model_config": {},
                "training": {
                    "checkpoint_interval": 1,
                    "evaluation_interval": 10,
                    "early_stop": {
                        "criteria": "val/total_loss"
                    },
                    "batch_size": 16,
                    "log_interval": 10,
                    "logger_level": "info",
                },
                "env": {
                    "save_dir": self.tmpdir
                },
            },
        )
        # Keep original copy for testing purposes
        self.trainer.config = deepcopy(self.config)
        registry.register("config", self.trainer.config)
        setup_logger()
        self.report = Mock(spec=Report)
        self.report.dataset_name = "abcd"
        self.report.dataset_type = "test"

        self.trainer.model = SimpleModule()
        self.trainer.val_loader = torch.utils.data.DataLoader(
            NumbersDataset(), batch_size=self.config.training.batch_size)

        self.trainer.optimizer = torch.optim.Adam(
            self.trainer.model.parameters(), lr=1e-01)
        self.trainer.device = "cpu"
        self.trainer.num_updates = 0
        self.trainer.current_iteration = 0
        self.trainer.current_epoch = 0
        self.trainer.max_updates = 0
        self.trainer.meter = Meter()
        self.cb = LogisticsCallback(self.config, self.trainer)
Ejemplo n.º 12
0
    def setUp(self):
        self.trainer = argparse.Namespace()
        self.config = load_yaml(os.path.join("configs", "defaults.yaml"))
        self.config = OmegaConf.merge(
            self.config,
            {
                "model": "simple",
                "model_config": {},
                "training": {
                    "lr_scheduler": True,
                    "lr_ratio": 0.1,
                    "lr_steps": [1, 2],
                    "use_warmup": False,
                    "callbacks": [{
                        "type": "test_callback",
                        "params": {}
                    }],
                },
            },
        )
        # Keep original copy for testing purposes
        self.trainer.config = deepcopy(self.config)
        registry.register("config", self.trainer.config)

        model = SimpleModel(SimpleModel.Config())
        model.build()
        self.trainer.model = model
        self.trainer.val_loader = torch.utils.data.DataLoader(
            NumbersDataset(2), batch_size=self.config.training.batch_size)

        self.trainer.optimizer = torch.optim.Adam(
            self.trainer.model.parameters(), lr=1e-01)
        self.trainer.lr_scheduler_callback = LRSchedulerCallback(
            self.config, self.trainer)

        self.trainer.callbacks = []
        for callback in self.config.training.get("callbacks", []):
            callback_type = callback.type
            callback_param = callback.params
            callback_cls = registry.get_callback_class(callback_type)
            self.trainer.callbacks.append(
                callback_cls(self.trainer.config, self.trainer,
                             **callback_param))
Ejemplo n.º 13
0
    def test_caption_bleu4(self):
        path = os.path.join(
            os.path.abspath(__file__),
            "../../../mmf/configs/datasets/coco/defaults.yaml",
        )
        config = load_yaml(os.path.abspath(path))
        captioning_config = config.dataset_config.coco
        caption_processor_config = captioning_config.processors.caption_processor
        vocab_path = os.path.join(os.path.abspath(__file__), "..", "..",
                                  "data", "vocab.txt")
        caption_processor_config.params.vocab.type = "random"
        caption_processor_config.params.vocab.vocab_file = os.path.abspath(
            vocab_path)
        caption_processor = CaptionProcessor(caption_processor_config.params)
        registry.register("coco_caption_processor", caption_processor)

        caption_bleu4 = metrics.CaptionBleu4Metric()
        expected = Sample()
        predicted = dict()

        # Test complete match
        expected.answers = torch.empty((5, 5, 10))
        expected.answers.fill_(4)
        predicted["scores"] = torch.zeros((5, 10, 19))
        predicted["scores"][:, :, 4] = 1.0

        self.assertEqual(
            caption_bleu4.calculate(expected, predicted).item(), 1.0)

        # Test partial match
        expected.answers = torch.empty((5, 5, 10))
        expected.answers.fill_(4)
        predicted["scores"] = torch.zeros((5, 10, 19))
        predicted["scores"][:, 0:5, 4] = 1.0
        predicted["scores"][:, 5:, 18] = 1.0

        self.assertAlmostEqual(
            caption_bleu4.calculate(expected, predicted).item(), 0.3928, 4)
Ejemplo n.º 14
0
 def test_save_config(self):
     with mock_env_with_temp() as d:
         Checkpoint(self.trainer)
         config = load_yaml(os.path.join(d, "config.yaml"))
         self.assertTrue(config == self.config)
         self.assertTrue(config == self.trainer.config)
Ejemplo n.º 15
0
 def _test_zoo_for_keys(self, path):
     zoo_config = load_yaml(path)
     self._recurse_on_config(zoo_config)
Ejemplo n.º 16
0
 def _test_zoo(self, path: str, callback: typing.Callable):
     zoo_config = load_yaml(path)
     self._recurse_on_config(zoo_config, callback=callback)
 def _get_config(self, path):
     path = os.path.join(os.path.abspath(__file__), path)
     config = load_yaml(os.path.abspath(path))
     return config
Ejemplo n.º 18
0
    def __init__(
        self,
        num_train_data,
        max_updates,
        max_epochs,
        config=None,
        optimizer=None,
        update_frequency=1,
        batch_size=1,
        batch_size_per_device=None,
        fp16=False,
        on_update_end_fn=None,
        scheduler_config=None,
        grad_clipping_config=None,
        tensorboard=False,
    ):
        if config is None:
            self.config = load_yaml("configs/defaults.yaml")
            self.config = OmegaConf.merge(
                self.config,
                {
                    "training": {
                        "detect_anomaly": False,
                        "evaluation_interval": 10000,
                        "update_frequency": update_frequency,
                        "fp16": fp16,
                        "batch_size": batch_size,
                        "batch_size_per_device": batch_size_per_device,
                        "tensorboard": tensorboard,
                        "run_type": "train",
                        "num_workers": 0,
                    },
                    "datasets": "",
                    "model": "",
                },
            )
            self.training_config = self.config.training
        else:
            config.training.batch_size = batch_size
            config.training.fp16 = fp16
            config.training.update_frequency = update_frequency
            config.training.tensorboard = tensorboard
            self.training_config = config.training
            self.config = config

        registry.register("config", self.config)

        if max_updates is not None:
            self.training_config["max_updates"] = max_updates
        if max_epochs is not None:
            self.training_config["max_epochs"] = max_epochs
        self.model = SimpleModel({"in_dim": 1})
        self.model.build()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            self.device = "cuda"
        else:
            self.device = "cpu"
        self.distributed = False

        if optimizer is None:
            self.optimizer = MagicMock()
            self.optimizer.step = MagicMock(return_value=None)
            self.optimizer.zero_grad = MagicMock(return_value=None)
        else:
            self.optimizer = optimizer

        if scheduler_config:
            config.training.lr_scheduler = True
            config.scheduler = scheduler_config
            self.lr_scheduler_callback = LRSchedulerCallback(config, self)
            self.callbacks.append(self.lr_scheduler_callback)
            on_update_end_fn = (on_update_end_fn if on_update_end_fn else
                                self.lr_scheduler_callback.on_update_end)
        if grad_clipping_config:
            self.training_config.clip_gradients = True
            self.training_config.max_grad_l2_norm = grad_clipping_config[
                "max_grad_l2_norm"]
            self.training_config.clip_norm_mode = grad_clipping_config[
                "clip_norm_mode"]

        self.on_batch_start = MagicMock(return_value=None)
        self.on_update_start = MagicMock(return_value=None)
        self.logistics_callback = MagicMock(return_value=None)
        self.logistics_callback.log_interval = MagicMock(return_value=None)
        self.on_batch_end = MagicMock(return_value=None)
        self.on_update_end = (on_update_end_fn if on_update_end_fn else
                              MagicMock(return_value=None))
        self.on_validation_start = MagicMock(return_value=None)
        self.scaler = torch.cuda.amp.GradScaler(enabled=False)
        self.early_stop_callback = MagicMock(return_value=None)
        self.on_validation_end = MagicMock(return_value=None)
        self.metrics = MagicMock(return_value={})

        self.num_data = num_train_data
        self.run_type = self.config.get("run_type", "train")