def check_and_install_transliteration(force=False): # 'en' is not a supported transliteration language transliteration_languages = [l.code for l in languages if l.code != "en"] # check installed install_needed = [] if not force: t_packages_path = Path(polyglot.polyglot_path) / "transliteration2" for lang in transliteration_languages: if not (t_packages_path / lang / f"transliteration.{lang}.tar.bz2").exists(): install_needed.append(lang) else: install_needed = transliteration_languages # install the needed transliteration packages if install_needed: print( f"Installing transliteration models for the following languages: {', '.join(install_needed)}" ) from polyglot.downloader import Downloader downloader = Downloader() for lang in install_needed: downloader.download(f"transliteration2.{lang}")
def ner(): if not request.json or not 'text' in request.json: abort(400) input_text = request.json['text'].strip() if input_text == '': return jsonify({}), 200 text = Text(input_text) if 'lang' in request.json: downloader = Downloader(download_dir=polyglot.data_path+'/polyglot_data') supported_languages = [x.language for x in downloader.get_collection(task="ner2").packages] lang_ = request.json['lang'] if lang_ not in supported_languages: abort(400, {'message': 'language {} is not supported'.format(lang_)}) text.language = lang_ entities = text.entities result = {} for entity in entities: if entity.tag not in result: result[entity.tag] = set() result[entity.tag].add(' '.join(entity)) for tagType in result: result[tagType] = list(result[tagType]) return jsonify(result), 200
def status(): data = dict(default_data) data['missing_libraries'] = [] try: import textblob except ImportError: data['missing_libraries'].append('textblob') try: import spacy except ImportError: data['missing_libraries'].append('spacy') try: import gensim except ImportError: data['missing_libraries'].append('gensim') try: import newspaper except ImportError: data['missing_libraries'].append('newspaper') try: import langid except ImportError: data['missing_libraries'].append('langid') try: import readability except ImportError: data['missing_libraries'].append('readability') try: import bs4 except ImportError: data['missing_libraries'].append('bs4') try: import afinn except ImportError: data['missing_libraries'].append('afinn') try: import polyglot except ImportError: data['missing_libraries'].append('polyglot') else: from polyglot.downloader import Downloader dwnld = Downloader() data['polyglot_lang_models'] = {} for info in sorted(dwnld.collections(), key=str): status = dwnld.status(info) if info.id.startswith('LANG:') and status != 'not installed': data['polyglot_lang_models'][info.id] = status return jsonify(data)
def download(args): """ Download polyglot packages and models.""" downloader = Downloader(server_index_url = args.server_index_url) if args.packages: for pkg_id in args.packages: rv = downloader.download(info_or_id=unicode(pkg_id), download_dir=args.dir, quiet=args.quiet, force=args.force, halt_on_error=args.halt_on_error) if rv == False and args.halt_on_error: break else: downloader.download(download_dir=args.dir, quiet=args.quiet, force=args.force, halt_on_error=args.halt_on_error)
def _download_polyglot_data(): downloader = Downloader() # Download PT and ES embeddings for mention detection downloader.download('embeddings2.pt') downloader.download('embeddings2.es') # Download NER models downloader.download('ner2.pt') downloader.download('ner2.es')
def polyglot_default_install(): """checking and caching polyglot""" try: os.path.sep = '/' polyglot_path = POLIGLOT['path_polyglot_data'] polyglot_path = tools.get_abs_path(polyglot_path) if not os.path.exists(polyglot_path): os.makedirs(polyglot_path) load.polyglot_path = polyglot_path downloader = Downloader(download_dir=polyglot_path) for language in SERVER['language']: sentiment = False entities = False morph = False pos = False full_name = Language.from_code(language).name if language in get_supported_languages('embeddings2'): #if not downloader.is_installed(unicode('embeddings2.' + language)): if not downloader.is_installed('embeddings2.' + language): raise EnvironmentError( 'The {0} module for {1}({2}) was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'embeddings2', full_name, language)) if language in get_supported_languages('ner2'): entities = True #if not downloader.is_installed(unicode('ner2.' + language)): if not downloader.is_installed('ner2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'ner2', full_name)) if language in get_supported_languages('sentiment2'): sentiment = True #if not downloader.is_installed(unicode('sentiment2.' + language)): if not downloader.is_installed('sentiment2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'sentiment2', full_name)) if language in get_supported_languages('morph2'): morph = True # if not downloader.is_installed(unicode('morph2.' + language)): if not downloader.is_installed('morph2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'morph2', full_name)) if language in get_supported_languages('pos2'): pos = True # if not downloader.is_installed(unicode('pos2.' + language)): if not downloader.is_installed('pos2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'pos2', full_name)) # FOR POLYGLOT DOWNLOAD ON START SERVER if sentiment or entities or morph: text_polyglot = Text('Testing and cashing', hint_language_code=language) if sentiment: _ = text_polyglot.words[0].polarity _ = text_polyglot.sentences[0].polarity if entities: _ = text_polyglot.entities if morph: _ = text_polyglot.morphemes except: ex_type, ex, tb = sys.exc_info() # tools.message_box(str(ex) + 'TracebackError'+''.join(traceback.format_exc()), # str(ex_type), 0) raise EnvironmentError( str(ex) + 'TracebackError' + ''.join(traceback.format_exc()))