Esempio n. 1
0
    def __init__(self, vdict_path, adict_path, \
        batchsize=128, max_length=15, n_ans_vocabulary=1000, mode='train', data_shape=(2048)):

        self.batchsize = batchsize
        self.d_vocabulary = None
        self.batch_index = None
        self.batch_len = None
        self.rev_adict = None
        self.max_length = max_length
        self.n_ans_vocabulary = n_ans_vocabulary
        self.mode = mode
        self.data_shape = data_shape

        assert self.mode == 'test'

        # load vocabulary
        with open(vdict_path, 'r') as f:
            vdict = json.load(f)
        with open(adict_path, 'r') as f:
            adict = json.load(f)
        self.n_vocabulary, self.vdict = len(vdict), vdict
        self.n_ans_vocabulary, self.adict = len(adict), adict

        sputnik.install('spacy',
                        spacy.about.__version__,
                        'en_glove_cc_300_1m_vectors',
                        data_path=DATA_PATH)
        self.nlp = spacy.load('en',
                              via=DATA_PATH,
                              vectors='en_glove_cc_300_1m_vectors')
        self.glove_dict = {}  # word -> glove vector
Esempio n. 2
0
    def __init__(self, vdict_path, adict_path, \
        batchsize=128, max_length=15, n_ans_vocabulary=1000, mode='train', data_shape=(2048)):

        self.batchsize = batchsize
        self.d_vocabulary = None
        self.batch_index = None
        self.batch_len = None
        self.rev_adict = None
        self.max_length = max_length
        self.n_ans_vocabulary = n_ans_vocabulary
        self.mode = mode
        self.data_shape = data_shape

        assert self.mode == 'test'

        # load vocabulary
        with open(vdict_path,'r') as f:
            vdict = json.load(f)
        with open(adict_path,'r') as f:
            adict = json.load(f)
        self.n_vocabulary, self.vdict = len(vdict), vdict
        self.n_ans_vocabulary, self.adict = len(adict), adict

        sputnik.install('spacy', spacy.about.__version__, 'en_glove_cc_300_1m_vectors', data_path=DATA_PATH)
        self.nlp = spacy.load('en', via=DATA_PATH, vectors='en_glove_cc_300_1m_vectors')
        self.glove_dict = {} # word -> glove vector
def _spacy_en():
    yield None
    try:
        spacyen = spacy.load('en_default', via=data_path)
    except RuntimeError as e:
        if e.message == "Model not installed. Please run 'python -m spacy.en.download' to install latest compatible model.":
            print("Need to download Spacy data. Starting download now")
            sputnik.install('spacy', spacy.about.__version__,
                            'en_default', data_path=data_path)
            spacyen = spacy.load('en_default', via=data_path)
        else:
            raise
    while True:
        yield spacyen
Esempio n. 4
0
def download(lang, force=False, fail_on_exist=True):
    if force:
        sputnik.purge(about.__title__, about.__version__)

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__models__[lang])
        if fail_on_exist:
            print("Model already installed. Please run 'python -m "
                  "spacy.%s.download --force' to reinstall." % lang,
                  file=sys.stderr)
            sys.exit(0)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__title__, about.__version__,
                              about.__models__[lang])

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__models__[lang])
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run 'python -m "
              "spacy.%s.download --force'." % lang,
              file=sys.stderr)
        sys.exit(1)

    print("Model successfully installed.", file=sys.stderr)
Esempio n. 5
0
def download(lang, force=False, fail_on_exist=True):
    try:
        pkg = sputnik.package(about.__title__, about.__version__,
                              about.__models__.get(lang, lang))
        if force:
            shutil.rmtree(pkg.path)
        elif fail_on_exist:
            print("Model already installed. Please run 'python -m "
                  "spacy.%s.download --force' to reinstall." % lang,
                  file=sys.stderr)
            sys.exit(0)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__title__, about.__version__,
                              about.__models__.get(lang, lang))

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__models__.get(lang, lang))
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run 'python -m "
              "spacy.%s.download --force'." % lang,
              file=sys.stderr)
        sys.exit(1)

    data_path = util.get_data_path()
    print("Model successfully installed to %s" % data_path, file=sys.stderr)
Esempio n. 6
0
def main(force=False):
    if force:
        sputnik.purge(about.__title__, about.__version__)

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__default_model__)
        print("Model already installed. Please run '%s --force to reinstall." %
              sys.argv[0],
              file=sys.stderr)
        sys.exit(1)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__title__, about.__version__,
                              about.__default_model__)

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__default_model__)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run '%s --force." % sys.argv[0],
              file=sys.stderr)
        sys.exit(1)

    print("Model successfully installed.", file=sys.stderr)
Esempio n. 7
0
File: download.py Progetto: yv/spaCy
def download(lang, force=False, fail_on_exist=True, data_path=None):
    if not data_path:
        data_path = util.get_data_path()

    # spaCy uses pathlib, and util.get_data_path returns a pathlib.Path object,
    # but sputnik (which we're using below) doesn't use pathlib and requires
    # its data_path parameters to be strings, so we coerce the data_path to a
    # str here.
    data_path = str(data_path)

    try:
        pkg = sputnik.package(about.__title__, about.__version__,
                        about.__models__.get(lang, lang), data_path)
        if force:
            shutil.rmtree(pkg.path)
        elif fail_on_exist:
            print("Model already installed. Please run 'python -m "
                  "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
            sys.exit(0)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__title__, about.__version__,
                              about.__models__.get(lang, lang), data_path)

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__models__.get(lang, lang), data_path)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run 'python -m "
              "spacy.%s.download --force'." % lang, file=sys.stderr)
        sys.exit(1)

    print("Model successfully installed to %s" % data_path, file=sys.stderr)
Esempio n. 8
0
def main(data_size='all', force=False):
    if force:
        sputnik.purge(about.__name__, about.__version__)

    try:
        sputnik.package(about.__name__, about.__version__,
                        about.__default_model__)
        print(
            "Model already installed. Please run 'python -m "
            "spacy.en.download --force' to reinstall.",
            file=sys.stderr)
        sys.exit(1)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__name__, about.__version__,
                              about.__default_model__)

    try:
        sputnik.package(about.__name__, about.__version__,
                        about.__default_model__)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print(
            "Model failed to install. Please run 'python -m "
            "spacy.en.download --force'.",
            file=sys.stderr)
        sys.exit(1)

    # FIXME clean up old-style packages
    migrate(os.path.dirname(os.path.abspath(__file__)))

    print("Model successfully installed.", file=sys.stderr)
Esempio n. 9
0
def main(data_size='all', force=False):
    if force:
        sputnik.purge(about.__name__, about.__version__)

    try:
        sputnik.package(about.__name__, about.__version__, about.__default_model__)
        print("Model already installed. Please run 'python -m "
              "spacy.en.download --force' to reinstall.", file=sys.stderr)
        sys.exit(1)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__name__, about.__version__, about.__default_model__)

    try:
        sputnik.package(about.__name__, about.__version__, about.__default_model__)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run 'python -m "
              "spacy.en.download --force'.", file=sys.stderr)
        sys.exit(1)

    # FIXME clean up old-style packages
    migrate(os.path.dirname(os.path.abspath(__file__)))

    print("Model successfully installed.", file=sys.stderr)
Esempio n. 10
0
def download(lang, force=False, fail_on_exist=True):
    try:
        pkg = sputnik.package(about.__title__, about.__version__,
                        about.__models__.get(lang, lang))
        if force:
            shutil.rmtree(pkg.path)
        elif fail_on_exist:
            print("Model already installed. Please run 'python -m "
                  "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
            sys.exit(0)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__title__, about.__version__,
                              about.__models__.get(lang, lang))

    try:
        sputnik.package(about.__title__, about.__version__,
                        about.__models__.get(lang, lang))
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run 'python -m "
              "spacy.%s.download --force'." % lang, file=sys.stderr)
        sys.exit(1)

    data_path = util.get_data_path()
    print("Model successfully installed to %s" % data_path, file=sys.stderr)
Esempio n. 11
0
def _spacy_en():
    yield None
    try:
        spacyen = spacy.load('en_default', via=data_path)
    except RuntimeError as e:
        if e.message == "Model not installed. Please run 'python -m spacy.en.download' to install latest compatible model.":
            print("Need to download Spacy data. Starting download now")
            sputnik.install('spacy',
                            spacy.about.__version__,
                            'en_default',
                            data_path=data_path)
            spacyen = spacy.load('en_default', via=data_path)
        else:
            raise
    while True:
        yield spacyen
Esempio n. 12
0
def main(force=False):
    if force:
        sputnik.purge(about.__name__, about.__version__)

    try:
        sputnik.package(about.__name__, about.__version__, about.__default_model__)
        print("Model already installed. Please run '%s --force to reinstall." % sys.argv[0], file=sys.stderr)
        sys.exit(1)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__name__, about.__version__, about.__default_model__)

    try:
        sputnik.package(about.__name__, about.__version__, about.__default_model__)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run '%s --force." % sys.argv[0], file=sys.stderr)
        sys.exit(1)

    print("Model successfully installed.", file=sys.stderr)
Esempio n. 13
0
def download(lang, force=False):
    if force:
        sputnik.purge(about.__title__, about.__version__)

    try:
        sputnik.package(about.__title__, about.__version__, about.__models__[lang])
        print("Model already installed. Please run 'python -m "
              "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
        sys.exit(0)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass

    package = sputnik.install(about.__title__, about.__version__, about.__models__[lang])

    try:
        sputnik.package(about.__title__, about.__version__, about.__models__[lang])
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        print("Model failed to install. Please run 'python -m "
              "spacy.%s.download --force'." % lang, file=sys.stderr)
        sys.exit(1)

    print("Model successfully installed.", file=sys.stderr)
This module is to helps us manage our memory resources because it provides
a singleton wrapper around the Spacy English object to prevent it from
being instantiated over and over again. You can access the spacy english
object by calling the function like so:

 spacy_singleton.spacy_en()(self._source_text)

"""
import sputnik
import spacy
import os

data_path = os.path.join(os.path.dirname(spacy.__file__), 'en', 'data')
if not os.path.isdir(data_path):
    print("Need to download Spacy data. Starting download now")
    sputnik.install('spacy', spacy.about.__version__,
                    'en_default', data_path=data_path)


def _spacy_en():
    yield None
    try:
        spacyen = spacy.load('en_default', via=data_path)
    except RuntimeError as e:
        if e.message == "Model not installed. Please run 'python -m spacy.en.download' to install latest compatible model.":
            print("Need to download Spacy data. Starting download now")
            sputnik.install('spacy', spacy.about.__version__,
                            'en_default', data_path=data_path)
            spacyen = spacy.load('en_default', via=data_path)
        else:
            raise
    while True:
def _import_spacy():
    min_supported_spacy_version = '0.100.6'
    max_supported_spacy_version = '0.100.7'
    try:
        import spacy
    except:
        _logging.getLogger(__name__).error(
            ("spaCy version {} or greater is required. Please" +
             " install spaCy via pip or conda. See https://spacy.io/" +
             " for more information.").format(min_supported_spacy_version))
        raise

    try:
        spacy_version = spacy.about.__version__
        if LooseVersion(spacy_version) < LooseVersion(
                min_supported_spacy_version):
            raise RuntimeError((
                "spaCy version is less than {}. Please upgrade to {}.").format(
                    min_supported_spacy_version, min_supported_spacy_version))
        if LooseVersion(spacy_version) > LooseVersion(
                max_supported_spacy_version):
            _logging.getLogger(__name__).warn((
                "spaCy version is greater than {}. This version has not been tested and may not work"
            ).format(max_supported_spacy_version))

    # If the version attribute doesn't exist, the spaCy version is too old.
    except:
        raise RuntimeError(
            ("spaCy version is less than {}. Please upgrade to {}.").format(
                min_supported_spacy_version, min_supported_spacy_version))

    try:
        nlp = spacy.en.English()

    except:
        data_path = _os.path.join(_os.path.expanduser("~"), ".graphlab",
                                  "spacy_data")
        import sputnik
        if LooseVersion(spacy_version) > LooseVersion(
                min_supported_spacy_version):
            model = spacy.about.__models__['en']
        else:
            model = spacy.about.__default_model__

        try:
            p = sputnik.package(spacy.about.__title__,
                                spacy.about.__version__,
                                model,
                                data_path=_os.path.join(data_path, ""))
            nlp = spacy.en.English(package=p, load_vectors=False)
        except:
            _logging.getLogger(__name__).info(
                "Downloading data for spacy to ~/.graphlab directory")
            sputnik.install(spacy.about.__title__,
                            spacy.about.__version__,
                            model,
                            data_path=data_path)
            p = sputnik.package(spacy.about.__title__,
                                spacy.about.__version__,
                                model,
                                data_path=_os.path.join(data_path, ""))
            nlp = spacy.en.English(package=p, load_vectors=False)

    return nlp
Esempio n. 16
0
a singleton wrapper around the Spacy English object to prevent it from
being instantiated over and over again. You can access the spacy english
object by calling the function like so:

 spacy_singleton.spacy_en()(self._source_text)

"""
import sputnik
import spacy
import os

data_path = os.path.join(os.path.dirname(spacy.__file__), 'en', 'data')
if not os.path.isdir(data_path):
    print("Need to download Spacy data. Starting download now")
    sputnik.install('spacy',
                    spacy.about.__version__,
                    'en_default',
                    data_path=data_path)


def _spacy_en():
    yield None
    try:
        spacyen = spacy.load('en_default', via=data_path)
    except RuntimeError as e:
        if e.message == "Model not installed. Please run 'python -m spacy.en.download' to install latest compatible model.":
            print("Need to download Spacy data. Starting download now")
            sputnik.install('spacy',
                            spacy.about.__version__,
                            'en_default',
                            data_path=data_path)
            spacyen = spacy.load('en_default', via=data_path)