def setUp(self) -> None: self.repo_path = Path(tempfile.mkdtemp()) self.REPO_NAME = repo_name() self.repo = Repository( self.repo_path / self.REPO_NAME, clone_from=f"{USER}/{self.REPO_NAME}", use_auth_token=self._token, git_user="******", git_email="*****@*****.**", ) with self.repo.commit("Add README to main branch"): with open("README.md", "w+") as f: f.write(DUMMY_MODELCARD_EVAL_RESULT) self.existing_metadata = yaml.safe_load( DUMMY_MODELCARD_EVAL_RESULT.strip().strip("---"))
def test_init_from_existing_local_clone(self): subprocess.run( ["git", "clone", self._repo_url, WORKING_REPO_DIR], check=True, ) repo = Repository(WORKING_REPO_DIR) repo.lfs_track(["*.pdf"]) repo.lfs_enable_largefiles() repo.git_pull()
def test_add_commit_push(self): repo = Repository( WORKING_REPO_DIR, clone_from=self._repo_url, use_auth_token=self._token, git_user="******", git_email="*****@*****.**", ) # Create dummy files # one is lfs-tracked, the other is not. with open(os.path.join(WORKING_REPO_DIR, "dummy.txt"), "w") as f: f.write("hello") with open(os.path.join(WORKING_REPO_DIR, "model.bin"), "w") as f: f.write("hello") repo.git_add() repo.git_commit() try: url = repo.git_push() except subprocess.CalledProcessError as exc: print(exc.stderr) raise exc # Check that the returned commit url # actually exists. r = requests.head(url) r.raise_for_status()
def push_to_hub( save_directory: Optional[str], model_id: Optional[str] = None, repo_url: Optional[str] = None, commit_message: Optional[str] = "add model", organization: Optional[str] = None, private: bool = None, ) -> str: """ Parameters: save_directory (:obj:`Union[str, os.PathLike]`): Directory having model weights & config. model_id (:obj:`str`, `optional`, defaults to :obj:`save_directory`): Repo name in huggingface_hub. If not specified, repo name will be same as `save_directory` repo_url (:obj:`str`, `optional`): Specify this in case you want to push to existing repo in hub. organization (:obj:`str`, `optional`): Organization in which you want to push your model. private (:obj:`bool`, `optional`): private: Whether the model repo should be private (requires a paid huggingface.co account) commit_message (:obj:`str`, `optional`, defaults to :obj:`add model`): Message to commit while pushing Returns: url to commit on remote repo. """ if model_id is None: model_id = save_directory token = HfFolder.get_token() if repo_url is None: repo_url = HfApi().create_repo( token, model_id, organization=organization, private=private, repo_type=None, exist_ok=True, ) repo = Repository(save_directory, clone_from=repo_url, use_auth_token=token) return repo.push_to_hub(commit_message=commit_message)
def test_init_clone_in_nonempty_folder(self): # Create dummy files # one is lfs-tracked, the other is not. os.makedirs(WORKING_REPO_DIR, exist_ok=True) with open(os.path.join(WORKING_REPO_DIR, "dummy.txt"), "w") as f: f.write("hello") with open(os.path.join(WORKING_REPO_DIR, "model.bin"), "w") as f: f.write("hello") repo = Repository(WORKING_REPO_DIR, clone_from=self._repo_url) repo.lfs_track(["*.pdf"]) repo.lfs_enable_largefiles() repo.git_pull()
class RepocardUpdateTest(unittest.TestCase): _api = HfApi(endpoint=ENDPOINT_STAGING) @classmethod def setUpClass(cls): """ Share this valid token in all tests below. """ cls._token = TOKEN cls._api.set_access_token(TOKEN) @retry_endpoint def setUp(self) -> None: self.repo_path = Path(tempfile.mkdtemp()) self.REPO_NAME = repo_name() self.repo = Repository( self.repo_path / self.REPO_NAME, clone_from=f"{USER}/{self.REPO_NAME}", use_auth_token=self._token, git_user="******", git_email="*****@*****.**", ) with self.repo.commit("Add README to main branch"): with open("README.md", "w+") as f: f.write(DUMMY_MODELCARD_EVAL_RESULT) self.existing_metadata = yaml.safe_load( DUMMY_MODELCARD_EVAL_RESULT.strip().strip("---")) def tearDown(self) -> None: self._api.delete_repo(repo_id=f"{self.REPO_NAME}", token=self._token) shutil.rmtree(self.repo_path) def test_update_dataset_name(self): new_datasets_data = {"datasets": "['test/test_dataset']"} metadata_update(f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token) self.repo.git_pull() updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md") expected_metadata = copy.deepcopy(self.existing_metadata) expected_metadata.update(new_datasets_data) self.assertDictEqual(updated_metadata, expected_metadata) def test_update_existing_result_with_overwrite(self): new_metadata = copy.deepcopy(self.existing_metadata) new_metadata["model-index"][0]["results"][0]["metrics"][0][ "value"] = 0.2862102282047272 metadata_update(f"{USER}/{self.REPO_NAME}", new_metadata, token=self._token, overwrite=True) self.repo.git_pull() updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md") self.assertDictEqual(updated_metadata, new_metadata) def test_update_existing_result_without_overwrite(self): new_metadata = copy.deepcopy(self.existing_metadata) new_metadata["model-index"][0]["results"][0]["metrics"][0][ "value"] = 0.2862102282047272 with pytest.raises( ValueError, match= ("You passed a new value for the existing metric 'name: Accuracy, type:" " accuracy'. Set `overwrite=True` to overwrite existing metrics." ), ): metadata_update( f"{USER}/{self.REPO_NAME}", new_metadata, token=self._token, overwrite=False, ) def test_update_existing_field_without_overwrite(self): new_datasets_data = {"datasets": "['test/test_dataset']"} metadata_update(f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token) with pytest.raises( ValueError, match= ("You passed a new value for the existing meta data field 'datasets'." " Set `overwrite=True` to overwrite existing metadata."), ): new_datasets_data = {"datasets": "['test/test_dataset_2']"} metadata_update( f"{USER}/{self.REPO_NAME}", new_datasets_data, token=self._token, overwrite=False, ) def test_update_new_result_existing_dataset(self): new_result = metadata_eval_result( model_pretty_name="RoBERTa fine-tuned on ReactionGIF", task_pretty_name="Text Classification", task_id="text-classification", metrics_pretty_name="Recall", metrics_id="recall", metrics_value=0.7762102282047272, dataset_pretty_name="ReactionGIF", dataset_id="julien-c/reactiongif", ) metadata_update(f"{USER}/{self.REPO_NAME}", new_result, token=self._token, overwrite=False) expected_metadata = copy.deepcopy(self.existing_metadata) expected_metadata["model-index"][0]["results"][0]["metrics"].append( new_result["model-index"][0]["results"][0]["metrics"][0]) self.repo.git_pull() updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md") self.assertDictEqual(updated_metadata, expected_metadata) def test_update_new_result_new_dataset(self): new_result = metadata_eval_result( model_pretty_name="RoBERTa fine-tuned on ReactionGIF", task_pretty_name="Text Classification", task_id="text-classification", metrics_pretty_name="Accuracy", metrics_id="accuracy", metrics_value=0.2662102282047272, dataset_pretty_name="ReactionJPEG", dataset_id="julien-c/reactionjpeg", ) metadata_update(f"{USER}/{self.REPO_NAME}", new_result, token=self._token, overwrite=False) expected_metadata = copy.deepcopy(self.existing_metadata) expected_metadata["model-index"][0]["results"].append( new_result["model-index"][0]["results"][0]) self.repo.git_pull() updated_metadata = metadata_load(self.repo_path / self.REPO_NAME / "README.md") self.assertDictEqual(updated_metadata, expected_metadata)
def test_init_clone_in_empty_folder(self): repo = Repository(WORKING_REPO_DIR, clone_from=self._repo_url) repo.lfs_track(["*.pdf"]) repo.lfs_enable_largefiles() repo.git_pull()
def test_init_failure(self): with tempfile.TemporaryDirectory() as tmpdirname: with self.assertRaises(ValueError): _ = Repository(tmpdirname)
def push_to_hub_fastai( learner, repo_id: str, commit_message: Optional[str] = "Add model", private: Optional[bool] = None, token: Optional[str] = None, config: Optional[dict] = None, **kwargs, ): """ Upload learner checkpoint files to the Hub while synchronizing a local clone of the repo in :obj:`repo_id`. Args: learner (`Learner`): The `fastai.Learner' you'd like to push to the Hub. repo_id (`str`): The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de'). commit_message (`str`, *optional*): Message to commit while pushing. Will default to :obj:`"add model"`. private (`bool`, *optional*): Whether or not the repository created should be private. token (`str`, *optional*): The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt. config (`dict`, *optional*): Configuration object to be saved alongside the model weights. Keyword Args: api_endpoint (`str`, *optional*): The API endpoint to use when pushing the model to the hub. git_user (`str`, *optional*): Will override the ``git config user.name`` for committing and pushing files to the hub. git_email (`str`, *optional*): Will override the ``git config user.email`` for committing and pushing files to the hub. Returns: The url of the commit of your model in the given repository. <Tip> Raises the following error: - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) if the user is not log on to the Hugging Face Hub. </Tip> """ _check_fastai_fastcore_versions() api_endpoint: str = kwargs.get("api_endpoint", None) git_user: str = kwargs.get("git_user", None) git_email: str = kwargs.get("git_email", None) if token is None: token = HfFolder.get_token() if token is None: raise ValueError( "You must login to the Hugging Face Hub. There are two options: " "(1) Type `huggingface-cli login` in your terminal and enter your token. " "(2) Enter your token in the `token` argument. " "Your token is available in the Settings of your Hugging Face account. " ) # Create repo using `HfApi()`. repo_url = HfApi(endpoint=api_endpoint).create_repo( repo_id, token=token, private=private, repo_type=None, exist_ok=True, ) # If repository exists in the Hugging Face Hub then clone it locally in `repo_id`. repo = Repository( repo_id, clone_from=repo_url, use_auth_token=token, git_user=git_user, git_email=git_email, ) repo.git_pull(rebase=True) _save_pretrained_fastai(learner, repo_id, config=config) return repo.push_to_hub(commit_message=commit_message)