Beispiel #1
0
def check_and_install_transliteration(force=False):
    # 'en' is not a supported transliteration language
    transliteration_languages = [l.code for l in languages if l.code != "en"]

    # check installed
    install_needed = []
    if not force:
        t_packages_path = Path(polyglot.polyglot_path) / "transliteration2"
        for lang in transliteration_languages:
            if not (t_packages_path / lang /
                    f"transliteration.{lang}.tar.bz2").exists():
                install_needed.append(lang)
    else:
        install_needed = transliteration_languages

    # install the needed transliteration packages
    if install_needed:
        print(
            f"Installing transliteration models for the following languages: {', '.join(install_needed)}"
        )

        from polyglot.downloader import Downloader
        downloader = Downloader()

        for lang in install_needed:
            downloader.download(f"transliteration2.{lang}")
def ner():
    if not request.json or not 'text' in request.json:
        abort(400)

    input_text = request.json['text'].strip()
    if input_text == '':
        return jsonify({}), 200

    text = Text(input_text)

    if 'lang' in request.json:
        downloader = Downloader(download_dir=polyglot.data_path+'/polyglot_data')
        supported_languages = [x.language for x in downloader.get_collection(task="ner2").packages]
        lang_ = request.json['lang']
        if lang_ not in supported_languages:
            abort(400, {'message': 'language {} is not supported'.format(lang_)})

        text.language = lang_

    entities = text.entities
    result = {}
    for entity in entities:
        if entity.tag not in result:
            result[entity.tag] = set()

        result[entity.tag].add(' '.join(entity))

    for tagType in result:
        result[tagType] = list(result[tagType])

    return jsonify(result), 200
Beispiel #3
0
def status():
    data = dict(default_data)
    data['missing_libraries'] = []

    try:
        import textblob
    except ImportError:
        data['missing_libraries'].append('textblob')

    try:
        import spacy
    except ImportError:
        data['missing_libraries'].append('spacy')
    try:
        import gensim
    except ImportError:
        data['missing_libraries'].append('gensim')

    try:
        import newspaper
    except ImportError:
        data['missing_libraries'].append('newspaper')

    try:
        import langid
    except ImportError:
        data['missing_libraries'].append('langid')

    try:
        import readability
    except ImportError:
        data['missing_libraries'].append('readability')

    try:
        import bs4
    except ImportError:
        data['missing_libraries'].append('bs4')

    try:
        import afinn
    except ImportError:
        data['missing_libraries'].append('afinn')

    try:
        import polyglot
    except ImportError:
        data['missing_libraries'].append('polyglot')
    else:
        from polyglot.downloader import Downloader
        dwnld = Downloader()
        data['polyglot_lang_models'] = {}

        for info in sorted(dwnld.collections(), key=str):
            status = dwnld.status(info)
            if info.id.startswith('LANG:') and status != 'not installed':
                data['polyglot_lang_models'][info.id] = status

    return jsonify(data)
Beispiel #4
0
def download(args):
  """ Download polyglot packages and models."""

  downloader = Downloader(server_index_url = args.server_index_url)
  if args.packages:
    for pkg_id in args.packages:
      rv = downloader.download(info_or_id=unicode(pkg_id), download_dir=args.dir,
                               quiet=args.quiet, force=args.force,
                               halt_on_error=args.halt_on_error)
      if rv == False and args.halt_on_error:
        break
  else:
    downloader.download(download_dir=args.dir, quiet=args.quiet, force=args.force,
                        halt_on_error=args.halt_on_error)
Beispiel #5
0
def _download_polyglot_data():
    downloader = Downloader()

    # Download PT and ES embeddings for mention detection
    downloader.download('embeddings2.pt')
    downloader.download('embeddings2.es')

    # Download NER models
    downloader.download('ner2.pt')
    downloader.download('ner2.es')
Beispiel #6
0
def polyglot_default_install():
    """checking and caching polyglot"""
    try:
        os.path.sep = '/'
        polyglot_path = POLIGLOT['path_polyglot_data']
        polyglot_path = tools.get_abs_path(polyglot_path)

        if not os.path.exists(polyglot_path):
            os.makedirs(polyglot_path)

        load.polyglot_path = polyglot_path

        downloader = Downloader(download_dir=polyglot_path)
        for language in SERVER['language']:
            sentiment = False
            entities = False
            morph = False
            pos = False
            full_name = Language.from_code(language).name

            if language in get_supported_languages('embeddings2'):
                #if not downloader.is_installed(unicode('embeddings2.' + language)):
                if not downloader.is_installed('embeddings2.' + language):
                    raise EnvironmentError(
                        'The {0} module for {1}({2}) was not found, to install this package,'
                        ' run "./install/install_polyglot.py"'.format(
                            'embeddings2', full_name, language))

            if language in get_supported_languages('ner2'):
                entities = True
                #if not downloader.is_installed(unicode('ner2.' + language)):
                if not downloader.is_installed('ner2.' + language):
                    raise EnvironmentError(
                        'The {0} module for {1} was not found, to install this package,'
                        ' run "./install/install_polyglot.py"'.format(
                            'ner2', full_name))

            if language in get_supported_languages('sentiment2'):
                sentiment = True
                #if not downloader.is_installed(unicode('sentiment2.' + language)):
                if not downloader.is_installed('sentiment2.' + language):
                    raise EnvironmentError(
                        'The {0} module for {1} was not found, to install this package,'
                        ' run "./install/install_polyglot.py"'.format(
                            'sentiment2', full_name))

            if language in get_supported_languages('morph2'):
                morph = True
                # if not downloader.is_installed(unicode('morph2.' + language)):
                if not downloader.is_installed('morph2.' + language):
                    raise EnvironmentError(
                        'The {0} module for {1} was not found, to install this package,'
                        ' run "./install/install_polyglot.py"'.format(
                            'morph2', full_name))

            if language in get_supported_languages('pos2'):
                pos = True
                # if not downloader.is_installed(unicode('pos2.' + language)):
                if not downloader.is_installed('pos2.' + language):
                    raise EnvironmentError(
                        'The {0} module for {1} was not found, to install this package,'
                        ' run "./install/install_polyglot.py"'.format(
                            'pos2', full_name))
            # FOR POLYGLOT DOWNLOAD ON START SERVER
            if sentiment or entities or morph:
                text_polyglot = Text('Testing and cashing',
                                     hint_language_code=language)
                if sentiment:
                    _ = text_polyglot.words[0].polarity
                    _ = text_polyglot.sentences[0].polarity
                if entities:
                    _ = text_polyglot.entities
                if morph:
                    _ = text_polyglot.morphemes

    except:
        ex_type, ex, tb = sys.exc_info()
        # tools.message_box(str(ex) + 'TracebackError'+''.join(traceback.format_exc()),
        #                  str(ex_type), 0)
        raise EnvironmentError(
            str(ex) + 'TracebackError' + ''.join(traceback.format_exc()))