コード例 #1
0
ファイル: spacy_processors.py プロジェクト: williamwhe/forte
 def set_up(self):
     try:
         self.nlp = spacy.load(self.lang_model)
     except OSError:
         from spacy.cli.download import download
         download(self.lang_model)
         self.nlp = spacy.load(self.lang_model)
コード例 #2
0
    def _load_lang_model(self):
        # pylint: disable=import-outside-toplevel
        # download ScispaCy model using URL
        if self.lang_model in SCISPACYMODEL_URL:
            import subprocess
            import sys
            import os
            import importlib

            download_url = SCISPACYMODEL_URL[self.lang_model]
            command = [sys.executable, "-m", "pip", "install"] + [download_url]
            subprocess.run(command,
                           env=os.environ.copy(),
                           encoding="utf8",
                           check=False)

            cls = importlib.import_module(self.lang_model)
            self.nlp = cls.load()

        else:  # use spaCy download
            try:
                self.nlp = spacy.load(self.lang_model)
            except OSError:
                download(self.lang_model)
                self.nlp = spacy.load(self.lang_model)
コード例 #3
0
    def _setup(args):
        try:
            import spacy
            from spacy.cli.download import download
        except ImportError:
            print('error: required package "spacy" is not installed', file=sys.stderr)
            exit(1)

        if not args:
            print('error: you must pass a list of two-letter ISO 639-1 language codes to install the respective '
                  'language models or the string "all" to install all available language models', file=sys.stderr)
            exit(2)
        else:
            try:
                args.pop(args.index('--no-update'))
                no_update = True
            except ValueError:
                no_update = False

            if args == ['all']:
                install_languages = list(DEFAULT_LANGUAGE_MODELS.keys())
            else:
                install_languages = []
                for arg in args:
                    install_languages.extend([l for l in map(str.strip, arg.split(',')) if l])

        print('checking if required spaCy data packages are installed...')

        try:
            piplist_str = subprocess.check_output([sys.executable, '-m', 'pip', 'list',
                                                   '--disable-pip-version-check',
                                                   '--format', 'json'])
        except subprocess.CalledProcessError as exc:
            print('error: calling pip failed with the following error message\n' + str(exc), file=sys.stderr)
            exit(3)

        piplist = json.loads(piplist_str)
        installed_pkgs = set(item['name'] for item in piplist)
        model_pkgs = dict(zip(DEFAULT_LANGUAGE_MODELS.keys(),
                              map(lambda x: x.replace('_', '-') + '-sm', DEFAULT_LANGUAGE_MODELS.values())))

        for lang in install_languages:
            if lang not in DEFAULT_LANGUAGE_MODELS.keys():
                print('error: no language model for language code "%s"' % lang, file=sys.stderr)
                exit(4)

            lang_model_pkg = model_pkgs[lang]

            if no_update and lang_model_pkg in installed_pkgs:
                print('language model package "%s" for language code "%s" is already installed -- skipping'
                      % (lang_model_pkg, lang))
                continue

            lang_model = DEFAULT_LANGUAGE_MODELS[lang] + '_sm'
            print('installing language model "%s" for language code "%s"...' % (lang_model, lang))
            download(lang_model)

        print('done.')
コード例 #4
0
ファイル: spacy_features.py プロジェクト: flippersmcgee/Enso
 def load(self):
     """
     If the pre-trained `en_vectors_web_lg` model is not already stored on disk, it will be automatically downloaded
     as part of :func:`load()`. Note that the download process may require sudo permissions depending on your python package settings.
     """
     try:
         self.nlp = spacy.load('en_vectors_web_lg')
     except OSError:
         download('en_vectors_web_lg')
         self.nlp = spacy.load('en_vectors_web_lg')
コード例 #5
0
 def add_language(self, lang):
     print('Loading the %s model' % lang)
     lang_model = self.lang_dict.get(lang, lang)
     if lang == "ja":
         import ja_sudachipy
         self.nlp_dict[lang] = ja_sudachipy.Japanese().load(lang_model)
         return
     if info():
         if lang_model not in info()['Models'] and lang in lang_dict:
             download(lang_model)
     self.nlp_dict[lang] = spacy.load(lang_model)
コード例 #6
0
ファイル: __main__.py プロジェクト: qualichat/qualichat
def setup(parser: ArgumentParser, args: Namespace) -> None:
    api_key = password('Enter your Google API key:').ask()

    config['google_api_key'] = api_key
    config.save()

    with progress_bar(transient=True) as progress:
        progress.add_task('[green]Downloading spaCy models[/]', start=False)
        download('pt_core_news_md', False, False, '-q')
        download('en_core_web_sm', False, False, '-q')

    print('\n[green]✔ You can now use Qualichat.[/green]')
コード例 #7
0
ファイル: ner.py プロジェクト: vishalbelsare/ner-d
def download_model(model_name: str):
    """
    Downloads and links language trained model.

        >>> from nerd import ner
        >>> ner.download_model(model_name='en_core_web_sm')
        >>> supported languages 'en_core_web_sm', 'de_core_news_sm', 'fr_core_news_sm',
        >>> 'es_core_news_sm', 'pt_core_news_sm', 'it_core_news_sm',
        >>> 'nl_core_news_sm', 'el_core_news_sm', 'xx_ent_wiki_sm'

    :param model_name: Model package name.
    :type model_name: str
    """

    download(model_name)
    package_path = get_package_path(model_name)
    link(model_name, model_name, force=True, model_path=package_path)
コード例 #8
0
def main():
    # faz a veirificação da presença de complementos de linguagem necessários ao nltk
    try:
        nltk.tokenize.word_tokenize('Existe nltk punkt')
    except LookupError:
        nltk.download('punkt')

    try:
        spacy.load('pt')
    except IOError:
        download('pt')

    config = Config('data/configuration/', 'config.json')
    # executa as principais funções de cada classe, lendo arquivos de entrada e criando o modelo
    parser = run_data_parse(config)
    #model = run_model(config)

    # salva as principais informações do dataset
    create_dataset_info(parser)
コード例 #9
0
def load_model(
    *,
    name_or_nlp: str | Language,
    vocab: Union[Vocab, bool] = True,
    disable: Iterable[str] = None,
    exclude: Iterable[str] = None,
    keep_hyphens: bool = False,
    remove_whitespace_ents: bool = False,
) -> Language:

    if remove_whitespace_ents:
        Language.factories['remove_whitespace_entities'] = lambda _nlp, **_cfg: remove_whitespace_entities

    args: dict = skip_none_values(dict(vocab=vocab, disable=disable, exclude=exclude))

    if isinstance(name_or_nlp, Language):
        return name_or_nlp

    if isinstance(name_or_nlp, str):
        try:
            nlp: Language = load(name_or_nlp, **args)
        except OSError:
            logger.info(f"not found: {name_or_nlp}, downloading...")
            download(name_or_nlp)
            nlp: Language = load(name_or_nlp, **args)

            # try:
            #     name: Union[str, Language] = prepend_spacy_path(name_or_nlp)
            #     nlp: Language = load(name, **args)
            # except OSError:
            #     ...

    if keep_hyphens:
        nlp.tokenizer = keep_hyphen_tokenizer(nlp)

    return nlp
コード例 #10
0
ファイル: command_line.py プロジェクト: megagonlabs/ginza
def run(
    model_path: Optional[str] = None,
    ensure_model: Optional[str] = None,
    split_mode: Optional[str] = None,
    hash_comment: str = "print",
    output_path: Optional[Path] = None,
    output_format: str = "0",
    require_gpu: int = -1,
    disable_sentencizer: bool = False,
    use_normalized_form: bool = False,
    parallel_level: int = 1,
    files: List[str] = None,
):
    if output_format in ["3", "json"] and hash_comment != "analyze":
        print(
            f'hash_comment="{hash_comment}" not permitted for JSON output. Forced to use hash_comment="analyze".',
            file=sys.stderr)

    assert parallel_level == 1 or require_gpu == -1, "require_gpu not allowed for multi-processing. https://github.com/explosion/spaCy/issues/5507"

    if parallel_level <= 0:
        level = max(1, cpu_count() + parallel_level)
        if output_format in [2, "mecab"]:
            if require_gpu >= 0:
                print("GPU not used for mecab mode", file=sys.stderr)
                require_gpu = False
        elif parallel_level <= 0:
            if require_gpu >= 0:
                if level < 4:
                    print(
                        f"GPU #{require_gpu} enabled: parallel_level' set to {level}",
                        end="",
                        file=sys.stderr)
                else:
                    print(
                        f"GPU #{require_gpu} enabled: parallel_level' set to {level} but seems it's too much",
                        end="",
                        file=sys.stderr)
            else:
                print(f"'parallel_level' set to {level}", file=sys.stderr)
        elif require_gpu:
            print(f"GPU #{require_gpu} enabled", file=sys.stderr)
        parallel_level = level

    assert model_path is None or ensure_model is None
    if ensure_model:
        ensure_model = ensure_model.replace("-", "_")
        try:
            from importlib import import_module
            import_module(ensure_model)
        except ModuleNotFoundError:
            if GINZA_MODEL_PATTERN.match(ensure_model):
                print("Installing", ensure_model, file=sys.stderr)
                import pip
                pip.main(["install", ensure_model])
                print("Successfully installed", ensure_model, file=sys.stderr)
            elif SPACY_MODEL_PATTERN.match(ensure_model):
                print("Installing", ensure_model, file=sys.stderr)
                from spacy.cli.download import download
                download(ensure_model)
                print("Successfully installed", ensure_model, file=sys.stderr)
            else:
                raise OSError(
                    "E050",
                    f'You need to install "{ensure_model}" before executing ginza.'
                )
        model_name_or_path = ensure_model
    else:
        model_name_or_path = model_path

    analyzer = Analyzer(
        model_name_or_path,
        split_mode,
        hash_comment,
        output_format,
        require_gpu,
        disable_sentencizer,
        use_normalized_form,
    )

    output = _OutputWrapper(output_path, output_format)
    output.open()
    try:
        if not files and sys.stdin.isatty():
            _analyze_tty(analyzer, output)
        else:
            if not files:
                files = [0]
            if parallel_level == 1:
                _analyze_single(analyzer, output, files)
            else:
                _analyze_parallel(analyzer, output, files, parallel_level)
    finally:
        output.close()
コード例 #11
0
def download_model_by_name(*, model_name: str):
    download(model_name)
コード例 #12
0
ファイル: install_modules.py プロジェクト: chafa618/frn
#!/usr/bin/env python
# coding: utf-8

from spacy.cli.download import download, link

download(model='en_core_web_sm')
link(origin='en_core_web_sm', link_name='en')

import nltk

nltk.download('punkt')
コード例 #13
0
import string
from chatterbot import languages
import spacy
from spacy.cli.download import download
download(model="en_core_web_sm")


class LowercaseTagger(object):
    """
    Returns the text in lowercase.
    """
    def __init__(self, language=None):
        self.language = language or languages.ENG

    def get_text_index_string(self, text):
        return text.lower()


class PosLemmaTagger(object):
    def __init__(self, language=None):
        import spacy

        self.language = language or languages.ENG

        self.punctuation_table = str.maketrans(
            dict.fromkeys(string.punctuation))

        if self.language.ISO_639_1.lower() == 'en':
            self.nlp = spacy.load('en_core_web_sm')
        else:
            self.nlp = spacy.load(self.language.ISO_639_1.lower())
コード例 #14
0
class NoTokenizer(object):
    def __init__(self, vocab):
        self.vocab = vocab

    def __call__(self, tokens):
        spaces = [True] * len(tokens)
        return Doc(self.vocab, words=tokens, spaces=spaces)


try:
    lemmatizer = spacy.load('en', disable=['parser', 'ner'])
except OSError:
    # Assume the problem was the spacy models were not downloaded
    from spacy.cli.download import download
    download('en')
    lemmatizer = spacy.load('en', disable=['parser', 'ner'])
lemmatizer.tokenizer = NoTokenizer(lemmatizer.vocab)


def get_sparse_prob_indices(probs, alpha=0.0):
    """
    Here alpha is used to make a probability distribution sparse by
    limiting the maximum allowed relative decay between sorted
    probabilities
    """

    if probs.shape[0] == 1:
        return np.array([0])

    # sort the probabilities and compute the ratio from larger to smaller
コード例 #15
0
ファイル: engines.py プロジェクト: orestisfl/r4a-nao-nlp
    def init(
        self,
        snips_path: Optional[str] = os.path.join(HERE, "engine.tar.gz"),
        transformations: Optional[str] = os.path.join(HERE,
                                                      "transformations.json"),
        srl_predictor_path: Optional[
            str] = "https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz",
        spacy_lang: Optional[str] = "en_core_web_md",
        neuralcoref: bool = True,
        core_nlp_server_url: Optional[str] = "http://localhost:9000",
    ) -> None:
        logger.debug("Initializing shared resources")

        if core_nlp_server_url:
            logger.debug("Connecting to stanford CoreNLP server %s",
                         core_nlp_server_url)
            import requests

            try:
                requests.head(core_nlp_server_url).ok
                self._core_nlp_server_url = core_nlp_server_url
            except IOError:
                logger.exception("During HEAD request:")
                logger.warn(
                    "Failed to load CoreNLP server %s, make sure it is live and ready,"
                    " continuing without it.",
                    core_nlp_server_url,
                )

        if srl_predictor_path:
            logger.debug("Initiating allennlp SRL server with model from %s",
                         srl_predictor_path)
            from multiprocessing import Process, Queue

            self._srl_qi = Queue()
            self._srl_qo = Queue()
            self._srl_count = 0

            p = Process(
                target=_predictor_server,
                args=(srl_predictor_path, self._srl_qi, self._srl_qo),
                daemon=True,
            )
            p.start()

            @atexit.register
            def cleanup():
                logger.debug("Calling cleanup")
                self._srl_qi.put(None, timeout=0.5)
                self._srl_qi.close()
                self._srl_qo.close()
                p.join(timeout=5)
                if p.is_alive():
                    logger.error("Killing SRL server")
                    p.kill()

        if snips_path:
            logger.debug("Loading snips engine from %s", snips_path)
            from snips_nlu import SnipsNLUEngine

            if os.path.isdir(snips_path):
                logger.debug("%s is a directory, loading directly", snips_path)
                self._engine = SnipsNLUEngine.from_path(snips_path)
            else:
                with tarfile.open(snips_path, "r:gz") as archive:
                    with TemporaryDirectory() as tmp:
                        archive.extractall(tmp)
                        logger.debug(
                            "Extracted to temporary dir %s, loading from there",
                            tmp)
                        self._engine = SnipsNLUEngine.from_path(
                            os.path.join(tmp, "engine"))

        if transformations:
            logger.debug("Loading transformations file from %s",
                         transformations)

            with open(transformations) as f:
                self._transformations = json.load(f)

        if spacy_lang:
            logger.debug("Loading spacy lang %s", spacy_lang)
            try:
                module = importlib.import_module(spacy_lang)
            except ModuleNotFoundError:
                from spacy.cli.download import download

                download(spacy_lang)
                module = importlib.import_module(spacy_lang)

            self._spacy = module.load()

        if neuralcoref:
            if self._spacy is None:
                raise ValueError(
                    "neuralcoref is set but no spacy model is loaded")

            import neuralcoref

            neuralcoref.add_to_pipe(self._spacy)

        if self._spacy and self._core_nlp_server_url:
            from spacy.tokens import Token

            Token.set_extension("quote", default=None, force=True)
コード例 #16
0
 def set_up(self):
     try:
         self.nlp = spacy.load(self.lang_model)
     except OSError:
         download(self.lang_model)
         self.nlp = spacy.load(self.lang_model)
コード例 #17
0
    # Hyperparameter
    ## to set the amount of entities which should be saved to for each article
    amount_of_entities = 3
    # Name of the csv file with the data
    PATH_LABELED_DATA = './data_files/labeled_data.csv'
    PATH_UNLABELED_DATA = './data_files/data.csv'
    # Indexname for elasticSearch
    INDEX = 'labeled-news-english'
    # lenght of char around the entitiy
    char_length = 20

    # logging.basicConfig(level=logging.ERROR)

    # checks if the corpus is in the container and if needed downloaded it
    download('en_core_web_sm')
    # loads the spacy corpus
    nlp = spacy.load('en_core_web_sm')

    # connects to elasticsearch
    es = connect_elasticsearch()

    # ensures that an connection is successfull before perfom the following steps
    if es is not None:

        # loads the data set from which the entities shoudl be taken
        dataset = get_labeled_data(PATH_LABELED_DATA)

        # creates the amount of entities of each article
        entities = NER_spacy(dataset, amount_of_entities, char_length)
コード例 #18
0
ファイル: spacy.py プロジェクト: aarnphm/BentoML
def load(
    tag: t.Union[str, Tag],
    model_store: "ModelStore" = Provide[BentoMLContainer.model_store],
    vocab: t.Union["Vocab", bool] = True,  # type: ignore[reportUnknownParameterType]
    disable: t.Sequence[str] = tuple(),
    exclude: t.Sequence[str] = tuple(),
    config: t.Union[t.Dict[str, t.Any], "Config", None] = None,
) -> "spacy.language.Language":
    """
    Load a model from BentoML local modelstore with given name.

    Args:
        tag (:code:`Union[str, Tag]`):
            Tag of a saved model in BentoML local modelstore.
        model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`):
            BentoML modelstore, provided by DI Container.
        vocab (:code:`Union[spacy.vocab.Vocab, bool]`, `optional`, defaults to `True`):
            Optional vocab to pass in on initialization. If True, a new Vocab object will be created.
        disable (`Sequence[str]`, `optional`):
            Names of pipeline components to disable.
        exclude (`Sequence[str]`, `optional`):
            Names of pipeline components to exclude. Excluded
            components won't be loaded.
        config (:code:`Union[Dict[str, Any], spacy.Config]`, `optional`):
            Config overrides as nested dict or dict
            keyed by section values in dot notation.

    Returns:
        :obj:`spacy.language.Language`: an instance of :obj:`spacy.Language` from BentoML modelstore.

    Examples:

    .. code-block:: python

        import bentoml

        model = bentoml.spacy.load('custom_roberta')
    """
    model = model_store.get(tag)
    if model.info.module not in (MODULE_NAME, __name__):
        raise BentoMLException(
            f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}."
        )

    if "projects_uri" in model.info.options:
        raise BentoMLException(
            "Cannot use `bentoml.spacy.load()` to load Spacy Projects. Use"
            " `bentoml.spacy.load_project()` instead."
        )
    required = model.info.options["pip_package"]

    try:
        _ = importlib.import_module(required)
    except ModuleNotFoundError:
        try:
            from spacy.cli.download import download

            # TODO: move this to runner on startup hook
            download(required)
        except (SystemExit, Exception):  # pylint: disable=broad-except
            logger.warning(
                f"{required} cannot be downloaded as pip package. If this"
                " is a custom pipeline there is nothing to worry about."
                " If this is a pretrained model provided by Explosion make"
                " sure that you save the correct package and model to BentoML"
                " via `bentoml.spacy.save()`"
            )
    try:
        # check if pipeline has additional requirements then all related
        # pip package has been installed correctly.
        additional = model.info.options["additional_requirements"]
        not_existed = list()  # type: t.List[str]
        dists = packages_distributions()
        for module_name in additional:
            mod, _ = split_requirement(module_name)
            if mod not in dists:
                not_existed.append(module_name)
            if len(not_existed) > 0:
                raise MissingDependencyException(
                    f"`{','.join(not_existed)}` is required by `{tag}`."
                )
    except KeyError:
        pass
    import spacy.util

    return spacy.util.load_model(
        model.path,
        vocab=vocab,
        disable=disable,
        exclude=exclude,
        config=config if config else {},
    )
コード例 #19
0
import json
import ibm_watson
import json
import db
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

INTENT_DOCTOR = 'Register_Agent'
INTENT_COMMUNITY = 'Register_Community'

model_name = "en_core_web_sm"

import spacy
import csv
from spacy.cli.download import download
download(model_name)
#from spacy.cli import link
#from spacy.util import get_package_path
#package_path = get_package_path(model_name)
#link(model_name, model_name, force=True, package_path=package_path)

nlp = spacy.load('en_core_web_sm')

import logging
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)

replytext = 'empty msg'

with open('config.json') as config_file:
    config = json.load(config_file)

authenticator = IAMAuthenticator(config["ibm_assistant"]["iam_apikey"])
コード例 #20
0
from flask import Flask
from chatterbot import ChatBot
from chatterbot.trainers import ChatterBotCorpusTrainer
import os
import spacy
from spacy.cli.download import download
download(model="en")

app = Flask(__name__)
chatterbot = ChatBot("Jarvis",
                     storage_adapter="chatterbot.storage.SQLStorageAdapter",
                     read_only=False,
                     database_uri=os.environ['DATABASE_URL'])

# To train with default english corpus

# chatterbot.set_trainer(ChatterBotCorpusTrainer)

# chatterbot.train(
#     "chatterbot.corpus.english"
# )

# Create a new trainer for the chatbot
trainer = ChatterBotCorpusTrainer(chatterbot)

# Train the chatbot based on the english corpus
trainer.train(
    "chatterbot.corpus.english.ai",
    "chatterbot.corpus.english.botprofile",
    "chatterbot.corpus.english.computers",
    "chatterbot.corpus.english.conversations",