Example #1
0
def upload_dict(model: str, files_mapping: Dict[str, str], username: str = HUGGINGFACE_USERNAME):
    """
    Upload to huggingface repository, make sure already login using CLI,
    ```
    huggingface-cli login
    ```

    Parameters
    ----------
    model: str
        it will become repository name.
    files_mapping: Dict[str, str]
        {local_file: target_file}
    username: str, optional (default=os.environ.get('HUGGINGFACE_USERNAME', 'huseinzol05'))
    """
    try:
        create_repo(name=model)
    except Exception as e:
        logger.warning(e)

    repo_id = f'{username}/{model}'

    for file, file_remote in files_mapping.items():
        upload_file(path_or_fileobj=file,
                    path_in_repo=file_remote,
                    repo_id=repo_id)

        logger.info(f'Uploaded from {file} to {repo_id}/{file_remote}')
Example #2
0
def upload(model: str, directory: str, username: str = HUGGINGFACE_USERNAME):
    """
    Upload to huggingface repository, make sure already login using CLI,
    ```
    huggingface-cli login
    ```

    Parameters
    ----------
    model: str
        it will become repository name.
    directory: str
        local directory with files in it.
    username: str, optional (default=os.environ.get('HUGGINGFACE_USERNAME', 'huseinzol05'))
    """
    try:
        create_repo(name=model)
    except Exception as e:
        logger.warning(e)

    repo_id = f'{username}/{model}'

    for file in glob(os.path.join(directory, '*')):
        file_remote = os.path.split(file)[1]
        upload_file(path_or_fileobj=file,
                    path_in_repo=file_remote,
                    repo_id=repo_id)
        logger.info(f'Uploading from {file} to {repo_id}/{file_remote}')
Example #3
0
    def _get_repo_url_from_name(
        repo_name: str,
        organization: Optional[str] = None,
        private: bool = None,
        use_auth_token: Optional[Union[bool, str]] = None,
    ) -> str:
        if isinstance(use_auth_token, str):
            token = use_auth_token
        elif use_auth_token:
            token = HfFolder.get_token()
            if token is None:
                raise ValueError(
                    "You must login to the Hugging Face hub on this computer by typing `transformers-cli login` and "
                    "entering your credentials to use `use_auth_token=True`. Alternatively, you can pass your own "
                    "token as the `use_auth_token` argument.")
        else:
            token = None

        # Special provision for the test endpoint (CI)
        return create_repo(
            token,
            repo_name,
            organization=organization,
            private=private,
            repo_type=None,
            exist_ok=True,
        )
Example #4
0
    def setup(self, components: List[Component], flagging_dir: str):
        """
        Params:
        flagging_dir (str): local directory where the dataset is cloned,
        updated, and pushed from.
        """
        try:
            import huggingface_hub
        except (ImportError, ModuleNotFoundError):
            raise ImportError(
                "Package `huggingface_hub` not found is needed "
                "for HuggingFaceDatasetSaver. Try 'pip install huggingface_hub'."
            )
        path_to_dataset_repo = huggingface_hub.create_repo(
            name=self.dataset_name,
            token=self.hf_foken,
            private=self.dataset_private,
            repo_type="dataset",
            exist_ok=True,
        )
        self.path_to_dataset_repo = path_to_dataset_repo  # e.g. "https://huggingface.co/datasets/abidlabs/test-audio-10"
        self.components = components
        self.flagging_dir = flagging_dir
        self.dataset_dir = os.path.join(flagging_dir, self.dataset_name)
        self.repo = huggingface_hub.Repository(
            local_dir=self.dataset_dir,
            clone_from=path_to_dataset_repo,
            use_auth_token=self.hf_foken,
        )
        self.repo.git_pull()

        # Should filename be user-specified?
        self.log_file = os.path.join(self.dataset_dir, "data.csv")
        self.infos_file = os.path.join(self.dataset_dir, "dataset_infos.json")
Example #5
0
    def _create_repo(
        self,
        repo_id: str,
        private: Optional[bool] = None,
        use_auth_token: Optional[Union[bool, str]] = None,
        repo_url: Optional[str] = None,
        organization: Optional[str] = None,
    ):
        """
        Create the repo if needed, cleans up repo_id with deprecated kwards `repo_url` and `organization`, retrives the
        token.
        """
        if repo_url is not None:
            warnings.warn(
                "The `repo_url` argument is deprecated and will be removed in v5 of Transformers. Use `repo_id` "
                "instead.")
            repo_id = repo_url.replace(f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/",
                                       "")
        if organization is not None:
            warnings.warn(
                "The `organization` argument is deprecated and will be removed in v5 of Transformers. Set your "
                "organization directly in the `repo_id` passed instead (`repo_id={organization}/{model_id}`)."
            )
            if not repo_id.startswith(organization):
                if "/" in repo_id:
                    repo_id = repo_id.split("/")[-1]
                repo_id = f"{organization}/{repo_id}"

        token = HfFolder.get_token(
        ) if use_auth_token is True else use_auth_token
        url = create_repo(repo_id=repo_id,
                          token=token,
                          private=private,
                          exist_ok=True)

        # If the namespace is not there, add it or `upload_file` will complain
        if "/" not in repo_id and url != f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{repo_id}":
            repo_id = get_full_repo_name(repo_id, token=token)
        return repo_id, token