def test_filter_models_with_cardData(self):
     _api = HfApi()
     models = _api.list_models(filter="co2_eq_emissions", cardData=True)
     self.assertTrue([hasattr(model, "cardData") for model in models])
     models = _api.list_models(filter="co2_eq_emissions")
     self.assertTrue(
         all([not hasattr(model, "cardData") for model in models]))
    def test_filter_models_by_language(self):
        _api = HfApi()
        f_fr = ModelFilter(language="fr")
        res_fr = _api.list_models(filter=f_fr)

        f_en = ModelFilter(language="en")
        res_en = _api.list_models(filter=f_en)

        assert len(res_fr) != len(res_en)
 def test_filter_models_with_task(self):
     _api = HfApi()
     f = ModelFilter(task="fill-mask", model_name="albert-base-v2")
     models = _api.list_models(filter=f)
     self.assertTrue("fill-mask" == models[0].pipeline_tag)
     self.assertTrue("albert-base-v2" in models[0].modelId)
     f = ModelFilter(task="dummytask")
     models = _api.list_models(filter=f)
     self.assertGreater(1, len(models))
 def test_filter_models_with_library(self):
     _api = HfApi()
     f = ModelFilter("microsoft",
                     model_name="wavlm-base-sd",
                     library="tensorflow")
     models = _api.list_models(filter=f)
     self.assertGreater(1, len(models))
     f = ModelFilter("microsoft",
                     model_name="wavlm-base-sd",
                     library="pytorch")
     models = _api.list_models(filter=f)
     self.assertGreater(len(models), 0)
 def test_filter_emissions_with_min(self):
     _api = HfApi()
     models = _api.list_models(emissions_thresholds=(5, None),
                               cardData=True)
     self.assertTrue(
         all([
             model.cardData["co2_eq_emissions"] >= 5 for model in models
             if isinstance(model.cardData["co2_eq_emissions"], (float, int))
         ]))
 def test_list_models_with_config(self):
     _api = HfApi()
     models = _api.list_models(filter="adapter-transformers",
                               fetch_config=True,
                               limit=20)
     found_configs = 0
     for model in models:
         if model.config:
             found_configs = found_configs + 1
     self.assertGreater(found_configs, 0)
Exemple #7
0
def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]:
    """Find models that can accept src_lang as input and return tgt_lang as output."""
    prefix = "Helsinki-NLP/opus-mt-"
    api = HfApi()
    model_list = api.list_models()
    model_ids = [x.modelId for x in model_list if x.modelId.startswith("Helsinki-NLP")]
    src_and_targ = [
        remove_prefix(m, prefix).lower().split("-") for m in model_ids if "+" not in m
    ]  # + cant be loaded.
    matching = [f"{prefix}{a}-{b}" for (a, b) in src_and_targ if src_lang in a and tgt_lang in b]
    return matching
 def test_list_models_complex_query(self):
     # Let's list the 10 most recent models
     # with tags "bert" and "jax",
     # ordered by last modified date.
     _api = HfApi()
     models = _api.list_models(filter=("bert", "jax"),
                               sort="lastModified",
                               direction=-1,
                               limit=10)
     # we have at least 1 models
     self.assertGreater(len(models), 1)
     self.assertLessEqual(len(models), 10)
     model = models[0]
     self.assertIsInstance(model, ModelInfo)
     self.assertTrue(all(tag in model.tags for tag in ["bert", "jax"]))
 def test_filter_models_with_complex_query(self):
     _api = HfApi()
     args = ModelSearchArguments()
     f = ModelFilter(
         task=args.pipeline_tag.TextClassification,
         library=[args.library.PyTorch, args.library.TensorFlow],
     )
     models = _api.list_models(filter=f)
     self.assertGreater(len(models), 1)
     self.assertTrue([
         "text-classification" in model.pipeline_tag
         or "text-classification" in model.tags for model in models
     ])
     self.assertTrue([
         "pytorch" in model.tags and "tf" in model.tags for model in models
     ])
Exemple #10
0
class FlairModelHub:
    """
    A class for interacting with the HF model hub API, and searching for Flair models by name or task

    Can optionally include your HuggingFace login for authorized access (but is not required)
    """
    def __init__(self, username=None, password=None):
        self.api = HfApi()
        if username and password:
            self.token = self.api.login(username, password)
        elif username or password:
            print(
                'Only a username or password was entered. You should include both to get authorized access'
            )
        self.models = self.api.list_models('flair') + FLAIR_MODELS

    def _format_results(self,
                        results: list,
                        as_dict=False,
                        user_uploaded=False
                        ) -> (List[HFModelResult], Dict[str, HFModelResult]):
        """
        Takes raw HuggingFace API results and makes them easier to read and work with
        """
        results = apply(FlairModelResult, results)
        if not user_uploaded:
            results = [
                r for r in results
                if 'flair/' in r.name or 'flairNLP/' in r.name
            ]
        if as_dict:
            dicts = apply(Self.to_dict(), results)
            results = {m['model_name']: m for m in dicts}
        return results

    def search_model_by_name(
        self,
        name: str,
        as_dict=False,
        user_uploaded=False
    ) -> (List[HFModelResult], Dict[str, HFModelResult]):
        """
        Searches HuggingFace Model API for all flair models containing `name` and returns a list of `HFModelResults`

        Optionally can return all models as `dict` rather than a list

        If `user_uploaded` is False, will only return models originating from Flair (such as flair/chunk-english-fast)

        Usage:
          ```python
          hub = FlairModelHubSearch()
          hub.search_model_by_name('flair/chunk-english-fast')
          ```
        """
        models = [m for m in self.models if name in m.modelId]
        return self._format_results(models, as_dict, user_uploaded)

    def search_model_by_task(
        self,
        task: str,
        as_dict=False,
        user_uploaded=False
    ) -> (List[HFModelResult], Dict[str, HFModelResult]):
        """
        Searches HuggingFace Model API for all flair models for `task` and returns a list of `HFModelResults`

        Optionally can return all models as `dict` rather than a list

        If `user_uploaded` is False, will only return models originating from Flair (such as flair/chunk-english-fast)

        Usage:
        ```python
            hub = FlairModelHubSearch()
            hub.search_model_by_task('ner')
            # OR: #
            hub.search_model_by_task(FLAIR_TASKS.NAMED_ENTITY_RECOGNITION)
        ```
        """
        if (task not in _flair_tasks.values()) and (task != ''):
            raise ValueError(f'''`{task}` is not a valid task.

            Please choose a valid one available from Flair: (https://huggingface.co/flair)
            Or with the `FLAIR_TASKS` object''')
        models = [
            m for m in self.models
            if task in m.modelId or task == m.pipeline_tag
        ]
        return self._format_results(models, as_dict, user_uploaded)
Exemple #11
0
class HFModelHub:
    """
    A class for interacting with the HF model hub API, and searching for models by name or task

    Can optionally include your HuggingFace login for authorized access (but is not required)
    """
    def __init__(self, username=None, password=None):
        self.api = HfApi()
        if username and password:
            self.token = self.api.login(username, password)
        elif username or password:
            print(
                'Only a username or password was entered. You should include both to get authorized access'
            )

    def _format_results(self,
                        results: list,
                        as_dict=False,
                        user_uploaded=False
                        ) -> (List[HFModelResult], Dict[str, HFModelResult]):
        """
        Takes raw HuggingFace API results and makes them easier to read and work with
        """
        results = apply(HFModelResult, results)
        if not user_uploaded:
            results = [r for r in results if '/' not in r.name]
        if as_dict:
            dicts = apply(Self.to_dict(), results)
            results = {m['model_name']: m for m in dicts}
        return results

    def search_model_by_task(
        self,
        task: str,
        as_dict=False,
        user_uploaded=False
    ) -> (List[HFModelResult], Dict[str, HFModelResult]):
        """
        Searches HuggingFace Model API for all pretrained models relating to `task` and returns a list of HFModelResults

        Optionally can return all models as a `dict` rather than a list

        If `user_uploaded` is False, will only return models originating in HuggingFace (such as distilgpt2)

        Usage:
        ```python
          hub = HFModelHubSearch()
          hub.search_model_by_task('summarization')
          # OR #
          hub.search_model_by_task(HF_TASKS.SUMMARIZATION)
      ```
        """
        if task not in _hf_tasks.values():
            raise ValueError(f'''`{task}` is not a valid task.

            Please choose a valid one available from HuggingFace: (https://huggingface.co/transformers/task_summary.html)
            Or with the `HF_TASKS` object''')
        models = self.api.list_models(task)
        return self._format_results(models, as_dict, user_uploaded)

    def search_model_by_name(
        self,
        name: str,
        as_dict=False,
        user_uploaded=False
    ) -> (List[HFModelResult], Dict[str, HFModelResult]):
        """
        Searches HuggingFace Model API for all pretrained models containing `name` and returns a list of HFModelResults

        Optionally can return all models as `dict` rather than a list

        If `user_uploaded` is False, will only return models originating from HuggingFace (such as distilgpt2)

        Usage:
          ```python
          hub = HFModelHubSearch()
          hub.search_model_by_name('gpt2')
          ```
        """
        if user_uploaded:
            models = self.api.list_models()
            models = self._format_results(models, as_dict, user_uploaded)
            models = [m for m in models if name in m.name]

        else:
            models = self.api.list_models(name)
            models = self._format_results(models, as_dict, user_uploaded)
        return models
 def test_filter_models_by_author_and_name(self):
     # Test we can search by an author and a name, but the model is not found
     _api = HfApi()
     f = ModelFilter("facebook", model_name="bart-base")
     models = _api.list_models(filter=f)
     self.assertTrue("facebook/bart-base" in models[0].modelId)
 def test_failing_filter_models_by_author_and_model_name(self):
     # Test we can search by an author and a name, but the model is not found
     _api = HfApi()
     f = ModelFilter(author="muellerzr", model_name="testme")
     models = _api.list_models(filter=f)
     self.assertEqual(len(models), 0)
 def test_filter_models_by_author(self):
     _api = HfApi()
     f = ModelFilter(author="muellerzr")
     models = _api.list_models(filter=f)
     self.assertGreater(len(models), 0)
     self.assertTrue("muellerzr" in models[0].modelId)
 def test_staging_list_models(self):
     _api = HfApi(endpoint=ENDPOINT_STAGING)
     _ = _api.list_models()
 def test_list_models(self):
     _api = HfApi()
     models = _api.list_models()
     self.assertGreater(len(models), 100)
     self.assertIsInstance(models[0], ModelInfo)
 def test_list_models_search(self):
     _api = HfApi()
     models = _api.list_models(search="bert")
     self.assertGreater(len(models), 10)
     self.assertIsInstance(models[0], ModelInfo)
     [self.assertTrue("bert" in model.modelId.lower()) for model in models]
 def test_list_models_author(self):
     _api = HfApi()
     models = _api.list_models(author="google")
     self.assertGreater(len(models), 10)
     self.assertIsInstance(models[0], ModelInfo)
     [self.assertTrue("google" in model.author for model in models)]