Ejemplo n.º 1
0
def _get_nlp(language="en", constituencies=False):
    """
    Get spaCY/benepar with models by language
    """
    import spacy

    language = language.lower()
    model_name = LANGUAGE_TO_MODEL.get(language, language)

    try:
        nlp = spacy.load(model_name)
    except OSError:
        from spacy.cli import download

        download(model_name)
        nlp = spacy.load(model_name)

    if language in BENEPAR_LANGUAGES and constituencies:
        from benepar.spacy_plugin import BeneparComponent

        try:
            nlp.add_pipe(BeneparComponent(BENEPAR_LANGUAGES[language]))
        except LookupError:
            import benepar

            benepar.download(BENEPAR_LANGUAGES[language])
            nlp.add_pipe(BeneparComponent(BENEPAR_LANGUAGES[language]))
            # nlp.add_pipe(nlp.create_pipe("sentencizer"))
    return nlp
Ejemplo n.º 2
0
    def _load_lang_model(self) -> None:
        """Load spaCy language model.

        If a model is not installed, download it before loading it.

        Currenty supported spaCy languages

        en English (50MB)
        de German (645MB)
        fr French (1.33GB)
        es Spanish (377MB)

        :return:
        """
        if self.lang in self.languages:
            if not SpacyParser.model_installed(self.lang):
                download(self.lang)
            model = spacy.load(self.lang)
        elif self.lang in self.alpha_languages:
            language_module = importlib.import_module(
                f"spacy.lang.{self.lang}")
            language_method = getattr(language_module,
                                      self.alpha_languages[self.lang])
            model = language_method()
        self.model = model
Ejemplo n.º 3
0
def get_data(tag):
  if str(os.environ.get('DOWNLOAD_MODELS', '')) == '1':
    from spacy.cli import download
    if tag == 'ner_en_core_web_sm':
      download('en_core_web_sm')
    if tag == 'xx_ent_wiki_sm':
      download('xx_ent_wiki_sm')
Ejemplo n.º 4
0
def load_nlp_pipeline(language="xx"):
    if language not in language_module_registry:
        logger.error(
            "Language {} is not supported."
            "Suported languages are: {}".format(language, language_module_registry.keys())
        )
        raise ValueError
    else:
        spacy_module_name = language_module_registry[language]
    global nlp_pipelines
    if nlp_pipelines[language] is None:
        logger.info("Loading NLP pipeline")
        try:
            import spacy
        except ImportError:
            logger.error(
                " spacy is not installed. "
                "In order to install all text feature dependencies run "
                "pip install ludwig[text]"
            )
            sys.exit(-1)

        try:
            nlp_pipelines[language] = spacy.load(spacy_module_name, disable=["parser", "tagger", "ner"])
        except OSError:
            logger.info(" spaCy {} model is missing, downloading it " "(this will only happen once)")
            from spacy.cli import download

            download(spacy_module_name)
            nlp_pipelines[language] = spacy.load(spacy_module_name, disable=["parser", "tagger", "ner"])

    return nlp_pipelines[language]
Ejemplo n.º 5
0
def load_nlp_pipeline(language='xx'):
    if language not in language_module_registry:
        logger.error('Language {} is not supported.'
                     'Suported languages are: {}'.format(
                         language, language_module_registry.keys()))
        raise ValueError
    else:
        spacy_module_name = language_module_registry[language]
    global nlp_pipelines
    if nlp_pipelines[language] is None:
        logger.info('Loading NLP pipeline')
        try:
            import spacy
        except ImportError:
            logger.error(
                ' spacy is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]')
            sys.exit(-1)

        try:
            nlp_pipelines[language] = spacy.load(
                spacy_module_name, disable=['parser', 'tagger', 'ner'])
        except OSError:
            logger.info(' spaCy {} model is missing, downloading it '
                        '(this will only happen once)')
            from spacy.cli import download
            download(spacy_module_name)
            nlp_pipelines[language] = spacy.load(
                spacy_module_name, disable=['parser', 'tagger', 'ner'])

    return nlp_pipelines[language]
Ejemplo n.º 6
0
    def load_lang_model(self):
        """
        Load spaCy language model or download if
        model is available and not installed

        Currenty supported spaCy languages

        en English (50MB)
        de German (645MB)
        fr French (1.33GB)
        es Spanish (377MB)

        :return:
        """
        if self.lang in self.languages:
            if not Spacy.model_installed(self.lang):
                download(self.lang)
            model = spacy.load(self.lang)
        elif self.lang in self.alpha_languages:
            language_module = importlib.import_module("spacy.lang.{}".format(
                self.lang))
            language_method = getattr(language_module,
                                      self.alpha_languages[self.lang])
            model = language_method()
            """ TODO: Depending on OS (Linux/macOS) and on the sentence to be parsed,
            UnicodeDecodeError or ValueError happens at the first use when lang='ja'.
            As a workaround, the model parses some sentence before actually being used.
            """
            if self.lang == "ja":
                try:
                    model("初期化")
                except (UnicodeDecodeError, ValueError):
                    pass
        self.model = model
Ejemplo n.º 7
0
    def __init__(self, index_type="mlm", model_path="bert-base-uncased", **kwargs):
        Expander.__init__(self, index_type)

        self.candidate_pos = ["NOUN", "ADJ", "ADV"]
        self.model_path = model_path

        allowed_keys = list(self.__dict__.keys())
        self.__dict__.update((k, v)
                             for k, v in kwargs.items() if k in allowed_keys)
        rejected_keys = set(kwargs.keys()) - set(allowed_keys)
        if rejected_keys:
            raise ValueError(
                "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys))

        logger.info(
            ">> loading HF model for Query Expansion from " + model_path)
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_path, use_fast=True)
        self.model = TFBertForMaskedLM.from_pretrained(
            self.model_path, from_pt=True)
        logger.info(">> Loading Spacy NLP model ")

        try:
            self.nlp = spacy.load('en_core_web_md')
        except OSError:
            logger.info(
                "Downloading language model for the spaCy POS tagger (don't worry, this will only happen once)")
            from spacy.cli import download
            download('en_core_web_md')
            self.nlp = spacy.load('en_core_web_md')
Ejemplo n.º 8
0
    def __init__(self):
        logger.info("Loading NLP model...")

        # Download model lazily if it wasn't previously installed
        download('en_core_web_lg')
        self.nlp = {"en": spacy.load("en_core_web_lg",
                                     disable=['parser', 'tagger'])}
Ejemplo n.º 9
0
 def __nlp(self):
     try:
         return spacy.load(self.BASE_MODEL)
     except IOError as _:
         from spacy.cli import download
         download(self.BASE_MODEL)
         return spacy.load(self.BASE_MODEL)
Ejemplo n.º 10
0
    def load_lang_model(self):
        """
        Load spaCy language model or download if model is available and not
        installed.

        Currenty supported spaCy languages

        en English (50MB)
        de German (645MB)
        fr French (1.33GB)
        es Spanish (377MB)

        :return:
        """
        if self.lang in self.languages:
            if not Spacy.model_installed(self.lang):
                download(self.lang)
            model = spacy.load(self.lang)
        elif self.lang in self.alpha_languages:
            language_module = importlib.import_module(
                f"spacy.lang.{self.lang}")
            language_method = getattr(language_module,
                                      self.alpha_languages[self.lang])
            model = language_method()
        self.model = model
Ejemplo n.º 11
0
    def run(self):
        _install.do_egg_install(self)
        import nltk
        nltk.download("stopwords")

        import spacy
        print('Downloading english language model for the spaCy POS tagger\n')
        from spacy.cli import download
        download('en_core_web_md')
Ejemplo n.º 12
0
def load_spacy(model_str):
    try:
        return spacy.load(model_str)
    except:
        try:
            download(model_str)
            return spacy.load(model_str)
        except:
            return spacy_udpipe.load_from_path(lang='cs', path=model_str)
Ejemplo n.º 13
0
def load_spacy_model(language='xx', disable=[]):
    try:
        nlp = spacy.load(spacy_model_lookup[language], disable=disable)
    except OSError:
        logging.warning(
            f'[INFO] Downloading spacy language model for {language}')
        from spacy.cli import download
        download(spacy_model_lookup[language])
        nlp = spacy.load(spacy_model_lookup[language], disable=disable)
    return nlp
Ejemplo n.º 14
0
def run():
    """
    Initialized the program, and it's dependencies, set Context variables required through out the program
    for performance during splashscreen, and then open the first window
    :return: None
    """

    app = Application([])
    app.setWindowIcon(get_icon())
    app.setStyleSheet(Shraavani.style_sheet())

    if True:

        def file_changed(path):
            print(path)  # TODO make debugging only in development
            app.setStyleSheet("")  # reset for overlapping issues
            app.setStyleSheet(Shraavani.style_sheet())

        fs_watcher = QFileSystemWatcher(
            [asset_path('qcss/'),
             asset_path('qcss/shraavani.qcss')])
        fs_watcher.fileChanged.connect(file_changed)

    with SplashScreen(app, asset_path('logo-splash.jpg')) as sp:
        sp.status_update('Initializing ...')
        Context.app = app
        time.sleep(2)

        sp.status_update('Loading DataBase and settings ...')
        Context.db = SqliteDatabase('vaaya.sqlite')
        Context.db.connect(reuse_if_open=True)
        from vaaya.gui.models import DMoods, JrnEntry
        Context.db.create_tables([DMoods, JrnEntry])
        time.sleep(1)

        sp.status_update('Loading module Spacy ...')
        try:
            Context.nlp = spacy.load('en_core_web_lg')
        except IOError:
            sp.status_update(
                'Downloading Spacy dependencies (est. 826 mb in size)')
            from spacy.cli import download
            download('en_core_web_lg')
            Context.nlp = spacy.load('en_core_web_lg')

        from vaaya.gui.activities import MoodMainActivity, MoodEntry, MoodAnalytics, MoodData
        # Different GUI's for use
        sp.set_after(
            MoodMainActivity([MoodEntry(),
                              MoodAnalytics(),
                              MoodData()]))  # move me if needed

    app.exec_()
Ejemplo n.º 15
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     import spacy
     from spacy.cli import download
     env_dir = ContribLoader._env_dir.resolve()
     download('en_core_web_sm', False,
              "--install-option=--prefix=%s" % env_dir)
     import en_core_web_sm
     self.nlp = en_core_web_sm.load()
     self.ne_types = {
         "PERSON", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "DATE"
     }
Ejemplo n.º 16
0
    def __call__(self, args):
        from spacy.cli import download

        if args.language:
            try:
                download(languages[args.language])
            except:
                print('We dont find your choice in our database')

        if args.list:
            print('You can choose the language that is in list')
            print(', '.join(list(languages.keys())))
Ejemplo n.º 17
0
def setup():
    try:
        import nltk
        nltk.download(['wordnet', 'wordnet_ic'])
    except ImportError:
        pass

    try:
        from spacy.cli import download
        download('en')
    except ImportError:
        pass
def download_models(languages=None, debug=False):
    logging.basicConfig(
        format="%(name)s - %(levelname)s - %(message)s",
        level=logging.DEBUG if debug else logging.INFO,
    )

    if not languages:
        languages = config("SUPPORTED_LANGUAGES", default="", cast=str)
    languages = cast_supported_languages(languages)

    for lang in languages:
        lang = lang.split("-")

        lang_slug = lang[0]
        model = lang[1] if len(lang) > 1 else None
        value = lang_to_model.get(lang_slug, {}).get(model, None)

        if model == "SPACY":
            if value.startswith("pip+"):
                model_name, pip_package = value[4:].split(":", 1)
                logger.debug("model name: {}".format(model_name))
                logger.debug("pip package: {}".format(pip_package))
                cmd = [
                    sys.executable,
                    "-m",
                    "pip",
                    "install",
                    "--no-deps",
                    "--no-cache-dir",
                    pip_package,
                ]
                logger.debug(" ".join(cmd))
                if subprocess.call(cmd, env=os.environ.copy()) == 0:
                    logger.debug("linking: {} to {}".format(
                        model_name, lang_slug))
                    package_path = get_package_path(model_name)
                    link(model_name,
                         lang_slug,
                         force=True,
                         model_path=package_path)
                else:
                    raise Exception("Error to download {}".format(lang_slug))
            elif lang_slug != value:
                logger.debug("downloading {}".format(value))
                download(value)
                logger.debug("linking: {} to {}".format(value, lang_slug))
                package_path = get_package_path(value)
                link(value, lang_slug, force=True, model_path=package_path)
            else:
                logger.debug("downloading {}".format(value))
                download(value)
        elif model == "BERT":
            download_bert(value)
Ejemplo n.º 19
0
def model_build(model_name="fr_core_news_sm"):
    """
    :param model_name: 使用的模型名词
    :return: 模型对象
    """
    try:
        nlp = spacy.load(model_name, disable=['ner'])
    except:
        from spacy.cli import download
        download(model_name)
        nlp = spacy.load(model_name, disable=['ner'])
    return nlp
Ejemplo n.º 20
0
def load_spacy():
    try:
        ENCORE = spacy.load('en_core_web_sm')
    except:  # pragma: nocover
        from spacy.cli import download
        download('en')

    try:
        ENCORE_LG = spacy.load('en_core_web_lg')
    except:  # pragma: nocover
        from spacy.cli import download
        download('en_core_web_lg')
Ejemplo n.º 21
0
 def __init__(self, **data: Any):
     super().__init__(**data)
     try:
         self._nlp = spacy.load(
             self.model_name_or_path,
             disable=["parser", "ner"],
         )
     except:
         download(self.model_name_or_path)
         self._nlp = spacy.load(
             self.model_name_or_path,
             disable=["parser", "ner"],
         )
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     import spacy
     env_dir = ContribLoader._env_dir.resolve()
     lock_file = os.path.join(env_dir, "spacy.lock")
     try:
         with filelock.FileLock(lock_file):
             from spacy.cli import download
             download('en_core_web_sm', False, "--install-option=--prefix=%s" % ContribLoader._env_dir.resolve())
             self.nlp = spacy.load('en_core_web_sm')
     finally:
         remove(lock_file)
     self.ne_types = {"PERSON", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "DATE"}
Ejemplo n.º 23
0
    def __init__(self, **data: Any):
        super().__init__(**data)

        if not self.engine_config:
            self.engine_config = PresidioEngineConfig()

        if not self.engine_config.models or len(
                self.engine_config.models) == 0:
            self.engine_config.models = [PresidioModelConfig()]

        # If spacy engine then load Spacy models and select languages
        languages = []
        for model_config in self.engine_config.models:
            languages.append(model_config.lang_code)

            # Check SpacyNlpEngine.engine_name
            if (self.engine_config.nlp_engine_name == "spacy"
                    and model_config.model_name is not None):
                try:
                    spacy_model = __import__(model_config.model_name)
                    spacy_model.load()
                    logger.info(
                        f"Spacy model {model_config.model_name} is already downloaded"
                    )
                except:
                    logger.warning(
                        f"Spacy model {model_config.model_name} is not downloaded"
                    )
                    logger.warning(
                        f"Downloading spacy model {model_config.model_name}, it might take some time"
                    )
                    from spacy.cli import download

                    download(model_config.model_name)

        # Create NLP engine based on configuration
        provider = NlpEngineProvider(
            nlp_configuration=self.engine_config.dict())
        nlp_engine = provider.create_engine()

        # Pass the created NLP engine and supported_languages to the AnalyzerEngine
        self._analyzer = AnalyzerEngine(nlp_engine=nlp_engine,
                                        supported_languages=languages)

        # self._analyzer.registry.load_predefined_recognizers()
        if self.entity_recognizers:
            for entity_recognizer in self.entity_recognizers:
                self._analyzer.registry.add_recognizer(entity_recognizer)

        # Initialize the anonymizer with logger
        self._anonymizer = AnonymizerEngine()
Ejemplo n.º 24
0
def load_spacy_model(model_name):
    """
    Method to load SpaCy models. If the model isn't available, it will download it.
    :param model_name: the name of the SpaCy model
    :return: the loaded model
    """
    try:
        nlp = spacy.load(model_name)
    except OSError:
        print(f"Model {model_name} wasn't found. Downloading now...")
        download(model_name)
        nlp = spacy.load(model_name)

    return nlp
Ejemplo n.º 25
0
def run(serve_forever=True):
    download("en")
    download("de")
    interpreter = RasaNLUInterpreter("models/current/nlu")
    bot = server.create_app("models/dialogue", interpreter=interpreter)

    logger.info("Started http server on port %s" % '8080')

    http_server = WSGIServer(('0.0.0.0', 8080), bot)
    logger.info("Up and running")
    try:
        http_server.serve_forever()
    except Exception as exc:
        logger.exception(exc)

    return bot
Ejemplo n.º 26
0
def spacy_model(model: str = 'en_core_web_md') -> None:
    """
    Download spaCy model.

    Parameters
    ----------
    model
        Model to be downloaded
    """
    try:
        spacy.load(model)
    except OSError:
        download(model)

        # https://github.com/explosion/spaCy/issues/3435
        package_path = get_package_path(model)
        link(model, model, force=True, model_path=package_path)
Ejemplo n.º 27
0
def download_models():
    languages = os.getenv("languages", "en_core_web_md").split()
    for lang in languages:
        download(model=lang, direct=False)

    print("Updating frontend settings...")
    frontend_settings = json.load(open("frontend/_data.json"))

    frontend_settings['index']['languages'] = {l: l for l in languages}
    frontend_settings['index']['default_language'] = languages[0]

    json.dump(frontend_settings,
              open("frontend/_data.json", "w"),
              sort_keys=True,
              indent=2)

    print("Done!")
Ejemplo n.º 28
0
    def setUpClass(cls) -> None:
        """
        Set up the tests environment
        """
        download('es_core_news_md')

        container.reset()

        cls.nlp_service_mock = Mock(spec=NlpService)
        cls.summarizer_mock = Mock(spec=SummaryService)
        cls.sentiment_analyzer_mock = Mock(spec=SentimentAnalysisService)
        cls.exchange_publisher_mock = MagicMock()

        container.set('nlp_service', cls.nlp_service_mock)
        container.set('summary_service', cls.summarizer_mock)
        container.set('sentiment_analysis_service',
                      cls.sentiment_analyzer_mock)
        container.set('exchange_publisher', cls.exchange_publisher_mock)
Ejemplo n.º 29
0
    def load_lang_model(lang):
        """
        Load spaCy language model or download if
        model is available and not installed

        Currenty supported spaCy languages

        en English (50MB)
        de German (645MB)
        fr French (1.33GB)
        es Spanish (377MB)

        :param lang:
        :return:
        """
        if not Spacy.model_installed(lang):
            download(lang)
        return spacy.load(lang)
Ejemplo n.º 30
0
    def load_lang_model(lang):
        '''
        Load spaCy language model or download if
        model is available and not installed

        Currenty supported spaCy languages

        en English (50MB)
        de German (645MB)
        fr French (1.33GB)
        es Spanish (377MB)

        :param lang:
        :return:
        '''
        if not Spacy.model_installed(lang):
            download(lang)
        return spacy.load(lang)
Ejemplo n.º 31
0
    def run(self):
        _install.run(self)
        import nltk
        nltk.download("stopwords")
        nltk.download("punkt")

        import spacy
        import sys

        try:
            nlp = spacy.load('en')
        except OSError:
            print(
                'Downloading language model for spaCy\n'
                "(don't worry, this will only happen once)",
                file=sys.stderr)
            from spacy.cli import download
            download('en')
            nlp = spacy.load('en')