예제 #1
0
    def download_fasttext_models(self):
        """Perform complete download of fastText models and save
        them in appropriate ``cltk_data`` dir.

        TODO: Add tests
        TODO: Implement ``overwrite``
        TODO: error out better or continue to _load_model?
        """
        model_url = self._build_fasttext_url()
        if not self.interactive:
            if not self.silent:
                print(
                    f"CLTK message: Going to download file '{model_url}' to '{self.model_fp} ..."
                )  # pragma: no cover
            get_file_with_progress_bar(model_url=model_url,
                                       file_path=self.model_fp)
        else:
            print(  # pragma: no cover
                "CLTK message: This part of the CLTK depends upon word embedding models from the Fasttext project."
            )  # pragma: no cover
            dl_is_allowed = query_yes_no(
                f"Do you want to download file '{model_url}' to '{self.model_fp}'?"
            )  # type: bool
            if dl_is_allowed:
                get_file_with_progress_bar(model_url=model_url,
                                           file_path=self.model_fp)
            else:
                raise CLTKException(
                    f"Download of necessary Stanza model declined for '{self.iso_code}'. Unable to continue with Stanza's processing."
                )
예제 #2
0
 def _download_model(self) -> None:
     """Interface with the `stanza` model downloader."""
     if not self.interactive:
         if not self.silent:
             print(
                 f"CLTK message: Going to download required Stanza models to ``{self.model_path}`` ..."
             )  # pragma: no cover
         stanza.download(lang=self.stanza_code, package=self.treebank)
     else:
         print(  # pragma: no cover
             "CLTK message: This part of the CLTK depends upon the Stanza NLP library."
         )  # pragma: no cover
         dl_is_allowed = query_yes_no(
             f"CLTK message: Allow download of Stanza models to ``{self.model_path}``?"
         )  # type: bool
         if dl_is_allowed:
             stanza.download(lang=self.stanza_code, package=self.treebank)
         else:
             raise CLTKException(
                 f"Download of necessary Stanza model declined for '{self.language}'. Unable to continue with Stanza's processing."
             )
     # if file model still not available after attempted DL, then raise error
     if not file_exists(self.model_path):
         raise FileNotFoundError(
             "Missing required models for ``stanza`` at ``{0}``.".format(
                 self.model_path
             )
         )
예제 #3
0
 def _download_nlpl_models(self) -> None:
     """Perform complete download of Word2Vec models and save
     them in appropriate ``cltk_data`` dir.
     """
     model_url = self.MAP_LANG_TO_URL[self.iso_code]
     if not self.interactive:
         if not self.silent:
             print(
                 f"CLTK message: Going to download file '{model_url}' to '{self.fp_zip} ..."
             )  # pragma: no cover
         get_file_with_progress_bar(model_url=model_url,
                                    file_path=self.fp_zip)
     else:
         print(  # pragma: no cover
             "CLTK message: This part of the CLTK depends upon word embedding models from the NLPL project."
         )  # pragma: no cover
         dl_is_allowed = query_yes_no(
             f"Do you want to download file '{model_url}' to '{self.fp_zip}'?"
         )  # type: bool
         if dl_is_allowed:
             get_file_with_progress_bar(model_url=model_url,
                                        file_path=self.fp_zip)
         else:
             raise CLTKException(
                 f"Download of necessary Stanza model declined for '{self.language}'. Unable to continue with Stanza's processing."
             )