def _get_nlp(language="en", constituencies=False): """ Get spaCY/benepar with models by language """ import spacy language = language.lower() model_name = LANGUAGE_TO_MODEL.get(language, language) try: nlp = spacy.load(model_name) except OSError: from spacy.cli import download download(model_name) nlp = spacy.load(model_name) if language in BENEPAR_LANGUAGES and constituencies: from benepar.spacy_plugin import BeneparComponent try: nlp.add_pipe(BeneparComponent(BENEPAR_LANGUAGES[language])) except LookupError: import benepar benepar.download(BENEPAR_LANGUAGES[language]) nlp.add_pipe(BeneparComponent(BENEPAR_LANGUAGES[language])) # nlp.add_pipe(nlp.create_pipe("sentencizer")) return nlp
def _load_lang_model(self) -> None: """Load spaCy language model. If a model is not installed, download it before loading it. Currenty supported spaCy languages en English (50MB) de German (645MB) fr French (1.33GB) es Spanish (377MB) :return: """ if self.lang in self.languages: if not SpacyParser.model_installed(self.lang): download(self.lang) model = spacy.load(self.lang) elif self.lang in self.alpha_languages: language_module = importlib.import_module( f"spacy.lang.{self.lang}") language_method = getattr(language_module, self.alpha_languages[self.lang]) model = language_method() self.model = model
def get_data(tag): if str(os.environ.get('DOWNLOAD_MODELS', '')) == '1': from spacy.cli import download if tag == 'ner_en_core_web_sm': download('en_core_web_sm') if tag == 'xx_ent_wiki_sm': download('xx_ent_wiki_sm')
def load_nlp_pipeline(language="xx"): if language not in language_module_registry: logger.error( "Language {} is not supported." "Suported languages are: {}".format(language, language_module_registry.keys()) ) raise ValueError else: spacy_module_name = language_module_registry[language] global nlp_pipelines if nlp_pipelines[language] is None: logger.info("Loading NLP pipeline") try: import spacy except ImportError: logger.error( " spacy is not installed. " "In order to install all text feature dependencies run " "pip install ludwig[text]" ) sys.exit(-1) try: nlp_pipelines[language] = spacy.load(spacy_module_name, disable=["parser", "tagger", "ner"]) except OSError: logger.info(" spaCy {} model is missing, downloading it " "(this will only happen once)") from spacy.cli import download download(spacy_module_name) nlp_pipelines[language] = spacy.load(spacy_module_name, disable=["parser", "tagger", "ner"]) return nlp_pipelines[language]
def load_nlp_pipeline(language='xx'): if language not in language_module_registry: logger.error('Language {} is not supported.' 'Suported languages are: {}'.format( language, language_module_registry.keys())) raise ValueError else: spacy_module_name = language_module_registry[language] global nlp_pipelines if nlp_pipelines[language] is None: logger.info('Loading NLP pipeline') try: import spacy except ImportError: logger.error( ' spacy is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]') sys.exit(-1) try: nlp_pipelines[language] = spacy.load( spacy_module_name, disable=['parser', 'tagger', 'ner']) except OSError: logger.info(' spaCy {} model is missing, downloading it ' '(this will only happen once)') from spacy.cli import download download(spacy_module_name) nlp_pipelines[language] = spacy.load( spacy_module_name, disable=['parser', 'tagger', 'ner']) return nlp_pipelines[language]
def load_lang_model(self): """ Load spaCy language model or download if model is available and not installed Currenty supported spaCy languages en English (50MB) de German (645MB) fr French (1.33GB) es Spanish (377MB) :return: """ if self.lang in self.languages: if not Spacy.model_installed(self.lang): download(self.lang) model = spacy.load(self.lang) elif self.lang in self.alpha_languages: language_module = importlib.import_module("spacy.lang.{}".format( self.lang)) language_method = getattr(language_module, self.alpha_languages[self.lang]) model = language_method() """ TODO: Depending on OS (Linux/macOS) and on the sentence to be parsed, UnicodeDecodeError or ValueError happens at the first use when lang='ja'. As a workaround, the model parses some sentence before actually being used. """ if self.lang == "ja": try: model("初期化") except (UnicodeDecodeError, ValueError): pass self.model = model
def __init__(self, index_type="mlm", model_path="bert-base-uncased", **kwargs): Expander.__init__(self, index_type) self.candidate_pos = ["NOUN", "ADJ", "ADV"] self.model_path = model_path allowed_keys = list(self.__dict__.keys()) self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys) rejected_keys = set(kwargs.keys()) - set(allowed_keys) if rejected_keys: raise ValueError( "Invalid arguments in ElasticSearchRetriever constructor:{}".format(rejected_keys)) logger.info( ">> loading HF model for Query Expansion from " + model_path) self.tokenizer = AutoTokenizer.from_pretrained( self.model_path, use_fast=True) self.model = TFBertForMaskedLM.from_pretrained( self.model_path, from_pt=True) logger.info(">> Loading Spacy NLP model ") try: self.nlp = spacy.load('en_core_web_md') except OSError: logger.info( "Downloading language model for the spaCy POS tagger (don't worry, this will only happen once)") from spacy.cli import download download('en_core_web_md') self.nlp = spacy.load('en_core_web_md')
def __init__(self): logger.info("Loading NLP model...") # Download model lazily if it wasn't previously installed download('en_core_web_lg') self.nlp = {"en": spacy.load("en_core_web_lg", disable=['parser', 'tagger'])}
def __nlp(self): try: return spacy.load(self.BASE_MODEL) except IOError as _: from spacy.cli import download download(self.BASE_MODEL) return spacy.load(self.BASE_MODEL)
def load_lang_model(self): """ Load spaCy language model or download if model is available and not installed. Currenty supported spaCy languages en English (50MB) de German (645MB) fr French (1.33GB) es Spanish (377MB) :return: """ if self.lang in self.languages: if not Spacy.model_installed(self.lang): download(self.lang) model = spacy.load(self.lang) elif self.lang in self.alpha_languages: language_module = importlib.import_module( f"spacy.lang.{self.lang}") language_method = getattr(language_module, self.alpha_languages[self.lang]) model = language_method() self.model = model
def run(self): _install.do_egg_install(self) import nltk nltk.download("stopwords") import spacy print('Downloading english language model for the spaCy POS tagger\n') from spacy.cli import download download('en_core_web_md')
def load_spacy(model_str): try: return spacy.load(model_str) except: try: download(model_str) return spacy.load(model_str) except: return spacy_udpipe.load_from_path(lang='cs', path=model_str)
def load_spacy_model(language='xx', disable=[]): try: nlp = spacy.load(spacy_model_lookup[language], disable=disable) except OSError: logging.warning( f'[INFO] Downloading spacy language model for {language}') from spacy.cli import download download(spacy_model_lookup[language]) nlp = spacy.load(spacy_model_lookup[language], disable=disable) return nlp
def run(): """ Initialized the program, and it's dependencies, set Context variables required through out the program for performance during splashscreen, and then open the first window :return: None """ app = Application([]) app.setWindowIcon(get_icon()) app.setStyleSheet(Shraavani.style_sheet()) if True: def file_changed(path): print(path) # TODO make debugging only in development app.setStyleSheet("") # reset for overlapping issues app.setStyleSheet(Shraavani.style_sheet()) fs_watcher = QFileSystemWatcher( [asset_path('qcss/'), asset_path('qcss/shraavani.qcss')]) fs_watcher.fileChanged.connect(file_changed) with SplashScreen(app, asset_path('logo-splash.jpg')) as sp: sp.status_update('Initializing ...') Context.app = app time.sleep(2) sp.status_update('Loading DataBase and settings ...') Context.db = SqliteDatabase('vaaya.sqlite') Context.db.connect(reuse_if_open=True) from vaaya.gui.models import DMoods, JrnEntry Context.db.create_tables([DMoods, JrnEntry]) time.sleep(1) sp.status_update('Loading module Spacy ...') try: Context.nlp = spacy.load('en_core_web_lg') except IOError: sp.status_update( 'Downloading Spacy dependencies (est. 826 mb in size)') from spacy.cli import download download('en_core_web_lg') Context.nlp = spacy.load('en_core_web_lg') from vaaya.gui.activities import MoodMainActivity, MoodEntry, MoodAnalytics, MoodData # Different GUI's for use sp.set_after( MoodMainActivity([MoodEntry(), MoodAnalytics(), MoodData()])) # move me if needed app.exec_()
def __init__(self, **kwargs): super().__init__(**kwargs) import spacy from spacy.cli import download env_dir = ContribLoader._env_dir.resolve() download('en_core_web_sm', False, "--install-option=--prefix=%s" % env_dir) import en_core_web_sm self.nlp = en_core_web_sm.load() self.ne_types = { "PERSON", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "DATE" }
def __call__(self, args): from spacy.cli import download if args.language: try: download(languages[args.language]) except: print('We dont find your choice in our database') if args.list: print('You can choose the language that is in list') print(', '.join(list(languages.keys())))
def setup(): try: import nltk nltk.download(['wordnet', 'wordnet_ic']) except ImportError: pass try: from spacy.cli import download download('en') except ImportError: pass
def download_models(languages=None, debug=False): logging.basicConfig( format="%(name)s - %(levelname)s - %(message)s", level=logging.DEBUG if debug else logging.INFO, ) if not languages: languages = config("SUPPORTED_LANGUAGES", default="", cast=str) languages = cast_supported_languages(languages) for lang in languages: lang = lang.split("-") lang_slug = lang[0] model = lang[1] if len(lang) > 1 else None value = lang_to_model.get(lang_slug, {}).get(model, None) if model == "SPACY": if value.startswith("pip+"): model_name, pip_package = value[4:].split(":", 1) logger.debug("model name: {}".format(model_name)) logger.debug("pip package: {}".format(pip_package)) cmd = [ sys.executable, "-m", "pip", "install", "--no-deps", "--no-cache-dir", pip_package, ] logger.debug(" ".join(cmd)) if subprocess.call(cmd, env=os.environ.copy()) == 0: logger.debug("linking: {} to {}".format( model_name, lang_slug)) package_path = get_package_path(model_name) link(model_name, lang_slug, force=True, model_path=package_path) else: raise Exception("Error to download {}".format(lang_slug)) elif lang_slug != value: logger.debug("downloading {}".format(value)) download(value) logger.debug("linking: {} to {}".format(value, lang_slug)) package_path = get_package_path(value) link(value, lang_slug, force=True, model_path=package_path) else: logger.debug("downloading {}".format(value)) download(value) elif model == "BERT": download_bert(value)
def model_build(model_name="fr_core_news_sm"): """ :param model_name: 使用的模型名词 :return: 模型对象 """ try: nlp = spacy.load(model_name, disable=['ner']) except: from spacy.cli import download download(model_name) nlp = spacy.load(model_name, disable=['ner']) return nlp
def load_spacy(): try: ENCORE = spacy.load('en_core_web_sm') except: # pragma: nocover from spacy.cli import download download('en') try: ENCORE_LG = spacy.load('en_core_web_lg') except: # pragma: nocover from spacy.cli import download download('en_core_web_lg')
def __init__(self, **data: Any): super().__init__(**data) try: self._nlp = spacy.load( self.model_name_or_path, disable=["parser", "ner"], ) except: download(self.model_name_or_path) self._nlp = spacy.load( self.model_name_or_path, disable=["parser", "ner"], )
def __init__(self, **kwargs): super().__init__(**kwargs) import spacy env_dir = ContribLoader._env_dir.resolve() lock_file = os.path.join(env_dir, "spacy.lock") try: with filelock.FileLock(lock_file): from spacy.cli import download download('en_core_web_sm', False, "--install-option=--prefix=%s" % ContribLoader._env_dir.resolve()) self.nlp = spacy.load('en_core_web_sm') finally: remove(lock_file) self.ne_types = {"PERSON", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "DATE"}
def __init__(self, **data: Any): super().__init__(**data) if not self.engine_config: self.engine_config = PresidioEngineConfig() if not self.engine_config.models or len( self.engine_config.models) == 0: self.engine_config.models = [PresidioModelConfig()] # If spacy engine then load Spacy models and select languages languages = [] for model_config in self.engine_config.models: languages.append(model_config.lang_code) # Check SpacyNlpEngine.engine_name if (self.engine_config.nlp_engine_name == "spacy" and model_config.model_name is not None): try: spacy_model = __import__(model_config.model_name) spacy_model.load() logger.info( f"Spacy model {model_config.model_name} is already downloaded" ) except: logger.warning( f"Spacy model {model_config.model_name} is not downloaded" ) logger.warning( f"Downloading spacy model {model_config.model_name}, it might take some time" ) from spacy.cli import download download(model_config.model_name) # Create NLP engine based on configuration provider = NlpEngineProvider( nlp_configuration=self.engine_config.dict()) nlp_engine = provider.create_engine() # Pass the created NLP engine and supported_languages to the AnalyzerEngine self._analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=languages) # self._analyzer.registry.load_predefined_recognizers() if self.entity_recognizers: for entity_recognizer in self.entity_recognizers: self._analyzer.registry.add_recognizer(entity_recognizer) # Initialize the anonymizer with logger self._anonymizer = AnonymizerEngine()
def load_spacy_model(model_name): """ Method to load SpaCy models. If the model isn't available, it will download it. :param model_name: the name of the SpaCy model :return: the loaded model """ try: nlp = spacy.load(model_name) except OSError: print(f"Model {model_name} wasn't found. Downloading now...") download(model_name) nlp = spacy.load(model_name) return nlp
def run(serve_forever=True): download("en") download("de") interpreter = RasaNLUInterpreter("models/current/nlu") bot = server.create_app("models/dialogue", interpreter=interpreter) logger.info("Started http server on port %s" % '8080') http_server = WSGIServer(('0.0.0.0', 8080), bot) logger.info("Up and running") try: http_server.serve_forever() except Exception as exc: logger.exception(exc) return bot
def spacy_model(model: str = 'en_core_web_md') -> None: """ Download spaCy model. Parameters ---------- model Model to be downloaded """ try: spacy.load(model) except OSError: download(model) # https://github.com/explosion/spaCy/issues/3435 package_path = get_package_path(model) link(model, model, force=True, model_path=package_path)
def download_models(): languages = os.getenv("languages", "en_core_web_md").split() for lang in languages: download(model=lang, direct=False) print("Updating frontend settings...") frontend_settings = json.load(open("frontend/_data.json")) frontend_settings['index']['languages'] = {l: l for l in languages} frontend_settings['index']['default_language'] = languages[0] json.dump(frontend_settings, open("frontend/_data.json", "w"), sort_keys=True, indent=2) print("Done!")
def setUpClass(cls) -> None: """ Set up the tests environment """ download('es_core_news_md') container.reset() cls.nlp_service_mock = Mock(spec=NlpService) cls.summarizer_mock = Mock(spec=SummaryService) cls.sentiment_analyzer_mock = Mock(spec=SentimentAnalysisService) cls.exchange_publisher_mock = MagicMock() container.set('nlp_service', cls.nlp_service_mock) container.set('summary_service', cls.summarizer_mock) container.set('sentiment_analysis_service', cls.sentiment_analyzer_mock) container.set('exchange_publisher', cls.exchange_publisher_mock)
def load_lang_model(lang): """ Load spaCy language model or download if model is available and not installed Currenty supported spaCy languages en English (50MB) de German (645MB) fr French (1.33GB) es Spanish (377MB) :param lang: :return: """ if not Spacy.model_installed(lang): download(lang) return spacy.load(lang)
def load_lang_model(lang): ''' Load spaCy language model or download if model is available and not installed Currenty supported spaCy languages en English (50MB) de German (645MB) fr French (1.33GB) es Spanish (377MB) :param lang: :return: ''' if not Spacy.model_installed(lang): download(lang) return spacy.load(lang)
def run(self): _install.run(self) import nltk nltk.download("stopwords") nltk.download("punkt") import spacy import sys try: nlp = spacy.load('en') except OSError: print( 'Downloading language model for spaCy\n' "(don't worry, this will only happen once)", file=sys.stderr) from spacy.cli import download download('en') nlp = spacy.load('en')