def test_init_from_existing_local_clone(self):
        subprocess.run(
            ["git", "clone", self._repo_url, WORKING_REPO_DIR],
            check=True,
        )

        repo = Repository(WORKING_REPO_DIR)
        repo.lfs_track(["*.pdf"])
        repo.lfs_enable_largefiles()
        repo.git_pull()
 def test_init_clone_in_nonempty_folder(self):
     # Create dummy files
     # one is lfs-tracked, the other is not.
     os.makedirs(WORKING_REPO_DIR, exist_ok=True)
     with open(os.path.join(WORKING_REPO_DIR, "dummy.txt"), "w") as f:
         f.write("hello")
     with open(os.path.join(WORKING_REPO_DIR, "model.bin"), "w") as f:
         f.write("hello")
     repo = Repository(WORKING_REPO_DIR, clone_from=self._repo_url)
     repo.lfs_track(["*.pdf"])
     repo.lfs_enable_largefiles()
     repo.git_pull()
Beispiel #3
0
class RepocardUpdateTest(unittest.TestCase):
    _api = HfApi(endpoint=ENDPOINT_STAGING)

    @classmethod
    def setUpClass(cls):
        """
        Share this valid token in all tests below.
        """
        cls._token = TOKEN
        cls._api.set_access_token(TOKEN)

    @retry_endpoint
    def setUp(self) -> None:
        self.repo_path = Path(tempfile.mkdtemp())
        self.REPO_NAME = repo_name()
        self.repo = Repository(
            self.repo_path / self.REPO_NAME,
            clone_from=f"{USER}/{self.REPO_NAME}",
            use_auth_token=self._token,
            git_user="******",
            git_email="*****@*****.**",
        )

        with self.repo.commit("Add README to main branch"):
            with open("README.md", "w+") as f:
                f.write(DUMMY_MODELCARD_EVAL_RESULT)

        self.existing_metadata = yaml.safe_load(
            DUMMY_MODELCARD_EVAL_RESULT.strip().strip("---"))

    def tearDown(self) -> None:
        self._api.delete_repo(repo_id=f"{self.REPO_NAME}", token=self._token)
        shutil.rmtree(self.repo_path)

    def test_update_dataset_name(self):
        new_datasets_data = {"datasets": "['test/test_dataset']"}
        metadata_update(f"{USER}/{self.REPO_NAME}",
                        new_datasets_data,
                        token=self._token)

        self.repo.git_pull()
        updated_metadata = metadata_load(self.repo_path / self.REPO_NAME /
                                         "README.md")
        expected_metadata = copy.deepcopy(self.existing_metadata)
        expected_metadata.update(new_datasets_data)
        self.assertDictEqual(updated_metadata, expected_metadata)

    def test_update_existing_result_with_overwrite(self):
        new_metadata = copy.deepcopy(self.existing_metadata)
        new_metadata["model-index"][0]["results"][0]["metrics"][0][
            "value"] = 0.2862102282047272
        metadata_update(f"{USER}/{self.REPO_NAME}",
                        new_metadata,
                        token=self._token,
                        overwrite=True)

        self.repo.git_pull()
        updated_metadata = metadata_load(self.repo_path / self.REPO_NAME /
                                         "README.md")
        self.assertDictEqual(updated_metadata, new_metadata)

    def test_update_existing_result_without_overwrite(self):
        new_metadata = copy.deepcopy(self.existing_metadata)
        new_metadata["model-index"][0]["results"][0]["metrics"][0][
            "value"] = 0.2862102282047272

        with pytest.raises(
                ValueError,
                match=
            ("You passed a new value for the existing metric 'name: Accuracy, type:"
             " accuracy'. Set `overwrite=True` to overwrite existing metrics."
             ),
        ):
            metadata_update(
                f"{USER}/{self.REPO_NAME}",
                new_metadata,
                token=self._token,
                overwrite=False,
            )

    def test_update_existing_field_without_overwrite(self):
        new_datasets_data = {"datasets": "['test/test_dataset']"}
        metadata_update(f"{USER}/{self.REPO_NAME}",
                        new_datasets_data,
                        token=self._token)

        with pytest.raises(
                ValueError,
                match=
            ("You passed a new value for the existing meta data field 'datasets'."
             " Set `overwrite=True` to overwrite existing metadata."),
        ):
            new_datasets_data = {"datasets": "['test/test_dataset_2']"}
            metadata_update(
                f"{USER}/{self.REPO_NAME}",
                new_datasets_data,
                token=self._token,
                overwrite=False,
            )

    def test_update_new_result_existing_dataset(self):
        new_result = metadata_eval_result(
            model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
            task_pretty_name="Text Classification",
            task_id="text-classification",
            metrics_pretty_name="Recall",
            metrics_id="recall",
            metrics_value=0.7762102282047272,
            dataset_pretty_name="ReactionGIF",
            dataset_id="julien-c/reactiongif",
        )

        metadata_update(f"{USER}/{self.REPO_NAME}",
                        new_result,
                        token=self._token,
                        overwrite=False)

        expected_metadata = copy.deepcopy(self.existing_metadata)
        expected_metadata["model-index"][0]["results"][0]["metrics"].append(
            new_result["model-index"][0]["results"][0]["metrics"][0])

        self.repo.git_pull()
        updated_metadata = metadata_load(self.repo_path / self.REPO_NAME /
                                         "README.md")
        self.assertDictEqual(updated_metadata, expected_metadata)

    def test_update_new_result_new_dataset(self):
        new_result = metadata_eval_result(
            model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
            task_pretty_name="Text Classification",
            task_id="text-classification",
            metrics_pretty_name="Accuracy",
            metrics_id="accuracy",
            metrics_value=0.2662102282047272,
            dataset_pretty_name="ReactionJPEG",
            dataset_id="julien-c/reactionjpeg",
        )

        metadata_update(f"{USER}/{self.REPO_NAME}",
                        new_result,
                        token=self._token,
                        overwrite=False)

        expected_metadata = copy.deepcopy(self.existing_metadata)
        expected_metadata["model-index"][0]["results"].append(
            new_result["model-index"][0]["results"][0])
        self.repo.git_pull()
        updated_metadata = metadata_load(self.repo_path / self.REPO_NAME /
                                         "README.md")
        self.assertDictEqual(updated_metadata, expected_metadata)
 def test_init_clone_in_empty_folder(self):
     repo = Repository(WORKING_REPO_DIR, clone_from=self._repo_url)
     repo.lfs_track(["*.pdf"])
     repo.lfs_enable_largefiles()
     repo.git_pull()
def push_to_hub_fastai(
    learner,
    repo_id: str,
    commit_message: Optional[str] = "Add model",
    private: Optional[bool] = None,
    token: Optional[str] = None,
    config: Optional[dict] = None,
    **kwargs,
):
    """
    Upload learner checkpoint files to the Hub while synchronizing a local clone of the repo in
    :obj:`repo_id`.

    Args:
        learner (`Learner`):
            The `fastai.Learner' you'd like to push to the Hub.
        repo_id (`str`):
            The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de').
        commit_message (`str`, *optional*):
            Message to commit while pushing. Will default to :obj:`"add model"`.
        private (`bool`, *optional*):
            Whether or not the repository created should be private.
        token (`str`, *optional*):
            The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt.
        config (`dict`, *optional*):
            Configuration object to be saved alongside the model weights.

    Keyword Args:
        api_endpoint (`str`, *optional*):
            The API endpoint to use when pushing the model to the hub.
        git_user (`str`, *optional*):
            Will override the ``git config user.name`` for committing and pushing files to the hub.
        git_email (`str`, *optional*):
            Will override the ``git config user.email`` for committing and pushing files to the hub.

    Returns:
        The url of the commit of your model in the given repository.

    <Tip>

    Raises the following error:

        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
          if the user is not log on to the Hugging Face Hub.

    </Tip>
    """

    _check_fastai_fastcore_versions()

    api_endpoint: str = kwargs.get("api_endpoint", None)
    git_user: str = kwargs.get("git_user", None)
    git_email: str = kwargs.get("git_email", None)

    if token is None:
        token = HfFolder.get_token()

    if token is None:
        raise ValueError(
            "You must login to the Hugging Face Hub. There are two options: "
            "(1) Type `huggingface-cli login` in your terminal and enter your token. "
            "(2) Enter your token in the `token` argument. "
            "Your token is available in the Settings of your Hugging Face account. "
        )

    # Create repo using `HfApi()`.
    repo_url = HfApi(endpoint=api_endpoint).create_repo(
        repo_id,
        token=token,
        private=private,
        repo_type=None,
        exist_ok=True,
    )

    # If repository exists in the Hugging Face Hub then clone it locally in `repo_id`.
    repo = Repository(
        repo_id,
        clone_from=repo_url,
        use_auth_token=token,
        git_user=git_user,
        git_email=git_email,
    )
    repo.git_pull(rebase=True)

    _save_pretrained_fastai(learner, repo_id, config=config)

    return repo.push_to_hub(commit_message=commit_message)