Ejemplo n.º 1
0
 def load_requirements(self, *args, **kwargs):
     if is_master():
         requirements = self.config.get("zoo_requirements", [])
         if isinstance(requirements, str):
             requirements = [requirements]
         for item in requirements:
             download_pretrained_model(item, *args, **kwargs)
     synchronize()
Ejemplo n.º 2
0
    def __init__(self, in_dim, weights_file, bias_file, model_data_dir, *args,
                 **kwargs):
        super().__init__()
        model_data_dir = get_absolute_path(model_data_dir)

        if not os.path.isabs(weights_file):
            weights_file = os.path.join(model_data_dir, weights_file)
        if not os.path.isabs(bias_file):
            bias_file = os.path.join(model_data_dir, bias_file)

        if not PathManager.exists(bias_file) or not PathManager.exists(
                weights_file):
            download_path = download_pretrained_model("detectron.vmb_weights")
            weights_file = get_absolute_path(
                os.path.join(download_path, "fc7_w.pkl"))
            bias_file = get_absolute_path(
                os.path.join(download_path, "fc7_b.pkl"))

        with PathManager.open(weights_file, "rb") as w:
            weights = pickle.load(w)
        with PathManager.open(bias_file, "rb") as b:
            bias = pickle.load(b)
        out_dim = bias.shape[0]

        self.lc = nn.Linear(in_dim, out_dim)
        self.lc.weight.data.copy_(torch.from_numpy(weights))
        self.lc.bias.data.copy_(torch.from_numpy(bias))
        self.out_dim = out_dim
Ejemplo n.º 3
0
def _load_pretrained_model(model_name_or_path, *args, **kwargs):
    if PathManager.exists(model_name_or_path):
        download_path = model_name_or_path
        model_name = model_name_or_path
    else:
        download_path = download_pretrained_model(model_name_or_path, *args, **kwargs)
        model_name = model_name_or_path

    _hack_imports()

    ckpt_path = get_ckpt_path_from_folder(download_path)
    ckpt = get_ckpt_from_path(ckpt_path)

    # If configs are not present, will ckpt provide the config?
    config = get_config_from_folder_or_ckpt(download_path, ckpt)
    model_config = config.get("model_config", config)
    ckpt = ckpt.get("model", ckpt)

    # Also handle the case of model_name is path
    if PathManager.exists(model_name):
        # This shouldn't happen
        assert len(model_config.keys()) == 1, "Checkpoint contains more than one model?"
        # Take first key
        model_config = model_config[list(model_config.keys())[0]]
    else:
        model_config = model_config.get(model_name.split(os.path.sep)[-1].split(".")[0])

    return {"config": model_config, "checkpoint": ckpt, "full_config": config}
Ejemplo n.º 4
0
def _load_pretrained_model(model_name_or_path, *args, **kwargs):
    if PathManager.exists(model_name_or_path):
        download_path = model_name_or_path
        model_name = model_name_or_path
    else:
        download_path = download_pretrained_model(model_name_or_path, *args,
                                                  **kwargs)
        model_name = model_name_or_path

    configs = glob.glob(os.path.join(download_path, "*.yaml"))
    assert len(configs) <= 1, (
        "Multiple yaml files with the pretrained model. " +
        "MMF doesn't know what to do.")

    ckpts = []
    allowed_ckpt_types = [f"*{ext}" for ext in ALLOWED_CHECKPOINT_EXTS]
    for ckpt_type in allowed_ckpt_types:
        ckpts.extend(glob.glob(os.path.join(download_path, ckpt_type)))

    assert (
        len(ckpts) == 1
    ), "None or multiple checkpoints files. MMF doesn't know what to do."

    _hack_imports()

    with PathManager.open(ckpts[0], "rb") as f:
        ckpt = torch.load(f, map_location=lambda storage, loc: storage)
    # If configs are not present, will ckpt provide the config?
    if len(configs) == 0:
        assert "config" in ckpt, (
            "No configs provided with pretrained model"
            " while checkpoint also doesn't have configuration.")
        config = ckpt["config"]
    else:
        config = load_yaml(configs[0])
    model_config = config.get("model_config", config)
    ckpt = ckpt.get("model", ckpt)

    # Also handle the case of model_name is path
    if PathManager.exists(model_name):
        # This shouldn't happen
        assert len(model_config.keys()
                   ) == 1, "Checkpoint contains more than one model?"
        # Take first key
        model_config = model_config[list(model_config.keys())[0]]
    else:
        model_config = model_config.get(
            model_name.split(os.path.sep)[-1].split(".")[0])

    return {"config": model_config, "checkpoint": ckpt, "full_config": config}
Ejemplo n.º 5
0
def load_pretrained_model(model_name_or_path, *args, **kwargs):
    # If this is a file, then load this directly else download and load
    if PathManager.exists(model_name_or_path):
        download_path = model_name_or_path
        model_name = model_name_or_path
    else:
        download_path = download_pretrained_model(model_name_or_path, *args,
                                                  **kwargs)
        model_name = model_name_or_path
    config_folder_path = os.path.dirname(download_path)
    configs = glob.glob(os.path.join(config_folder_path, "*.yaml"))
    assert len(configs) <= 1, (
        "Multiple yaml files with the pretrained model. " +
        "MMF doesn't know what to do.")

    ckpts = []
    allowed_ckpt_types = ("*.ckpt", "*.pth", "*.pt")
    for ckpt_type in allowed_ckpt_types:
        if download_path.endswith(ckpt_type.split("*")[1]):
            ckpts.extend(glob.glob(download_path))
        else:
            ckpts.extend(glob.glob(os.path.join(download_path, ckpt_type)))

    assert (
        len(ckpts) == 1
    ), "None or multiple checkpoints files. MMF doesn't know what to do."

    _hack_imports()

    ckpt = torch.load(ckpts[0], map_location=lambda storage, loc: storage)
    # If configs are not present, will ckpt provide the config?
    if len(configs) == 0:
        assert "config" in ckpt, (
            "No configs provided with pretrained model "
            " while checkpoint also doesn't have configuration.")
        config = ckpt["config"]
    else:
        config = load_yaml(configs[0])

    model_config = config.get("model_config", config)
    ckpt = ckpt.get("model", ckpt)
    # Also handle the case of model_name is path
    model_config = model_config.get(
        model_name.split(os.path.sep)[-1].split(".")[0])

    return {"config": model_config, "checkpoint": ckpt, "full_config": config}
Ejemplo n.º 6
0
    def test_load_resume_zoo_parity_with_mmf(self):
        # not specifying checkpoint.resume, but specifying
        # checkpoint.resume_zoo. It should load the model file
        # underlying zoo
        resume_zoo = "unimodal_text.hateful_memes.bert"
        ckpt_filepath = download_pretrained_model(resume_zoo)
        ckpt_filepath = get_ckpt_path_from_folder(ckpt_filepath)
        ckpt = torch.load(ckpt_filepath, map_location="cpu")

        ckpt_config = {"resume_zoo": resume_zoo, "zoo_config_override": True}

        with mock_env_with_temp("mmf.utils.checkpoint.get_mmf_env") as _:
            mmf_trainer = self._get_mmf_trainer(
                ckpt_config=ckpt_config,
                model_config=unimodal_text_model_config,
                max_updates=0,
            )
            mmf_trainer.on_init_start()
            mmf_ckpt = mmf_trainer.model.state_dict()
            mmf_ckpt.pop("base.encoder.embeddings.position_ids")
            self._assert_same_dict(ckpt, mmf_ckpt)

        with mock_env_with_temp(
                "mmf.trainers.lightning_trainer.get_mmf_env") as _:
            # lightning load from zoo, in this case, the zoo ckpt is in mmf format
            lightning = self._get_lightning_trainer(
                ckpt_config=ckpt_config,
                model_config=unimodal_text_model_config,
                max_steps=0,
                seed=4,
            )
            lightning.trainer.fit(lightning.model,
                                  train_dataloaders=lightning.train_loader)
            lightning_ckpt = lightning.model.state_dict()
            lightning_ckpt.pop("base.encoder.embeddings.position_ids")
            self._assert_same_dict(ckpt, lightning_ckpt)
Ejemplo n.º 7
0
 def load_requirements(self, config, *args, **kwargs):
     requirements = config.get("zoo_requirements", [])
     if isinstance(requirements, str):
         requirements = [requirements]
     for item in requirements:
         download_pretrained_model(item, *args, **kwargs)
Ejemplo n.º 8
0
    def get_checkpoint_data(self) -> Dict[str, Any]:
        """This function gets checkpoint file path on disk from
        config.trainer.params.resume_from_checkpoint. However if it not specified,
        it gets checkpoint path from config.checkpoint. If config.resume is specified
        it gets the latest checkpoint from the config's save directory (alternatively it
        gets the best checkpoint if config.resume_best is True). If config.resume is not
        specified, then it gets config.resume_file or the checkpoint file from
        config.resume_zoo (in that order).

        Returns:
            Dict[str, Any]: a dict containing the following keys,
            `checkpoint_path` (str) local file path for the checkpoint;
            `ckpt` (Dict[str, Any])
            `is_zoo` (Bool) whether or not the checkpoint is specified through a
                zoo identifier
            `config` (Dict[str, Any]]) the config that is stored together with this
                checkpoint
        """
        # get ckpt file path from config.trainer.params.resume_from_checkpoint
        path = self.config.trainer.params.get("resume_from_checkpoint", None)
        if path is not None:
            is_zoo = self.is_zoo_path(path)
            ckpt_filepath = path
            if is_zoo:
                folder = download_pretrained_model(path)
                ckpt_filepath = get_ckpt_path_from_folder(folder)
                ckpt = get_ckpt_from_path(ckpt_filepath)
                config = get_config_from_folder_or_ckpt(folder, ckpt)
            else:
                ckpt = get_ckpt_from_path(ckpt_filepath)
                config = None

            return {
                "ckpt": ckpt,
                "checkpoint_path": ckpt_filepath,
                "is_zoo": is_zoo,
                "config": config,
            }

        is_zoo = False
        config = None
        ckpt = None
        # get ckpt file path from config.checkpoint
        ckpt_config = self.config.checkpoint
        suffix = "best.ckpt" if ckpt_config.resume_best else "current.ckpt"
        path = os.path.join(get_mmf_env(key="save_dir"), suffix)
        ckpt_filepath = None
        resume_from_specified_path = (ckpt_config.resume_file is not None
                                      or ckpt_config.resume_zoo is not None
                                      ) and (not ckpt_config.resume
                                             or not PathManager.exists(path))
        if resume_from_specified_path:
            if ckpt_config.resume_file and PathManager.exists(
                    ckpt_config.resume_file):
                ckpt_filepath = ckpt_config.resume_file
            elif ckpt_config.resume_zoo is not None:
                is_zoo = True
                folder = download_pretrained_model(ckpt_config.resume_zoo)
                ckpt_filepath = get_ckpt_path_from_folder(folder)
                ckpt = get_ckpt_from_path(ckpt_filepath)
                config = get_config_from_folder_or_ckpt(folder, ckpt)
            else:
                raise RuntimeError(f"{ckpt_config.resume_file} doesn't exist")

        if ckpt_config.resume and PathManager.exists(path):
            ckpt_filepath = path

        if ckpt_filepath is not None:
            ckpt = get_ckpt_from_path(ckpt_filepath)

        return {
            "ckpt": ckpt,
            "checkpoint_path": ckpt_filepath,
            "is_zoo": is_zoo,
            "config": config,
        }