def language(self): if self.hint_language_code is not None: self.__lang = Language.from_code(self.hint_language_code) if self.__lang is None: self.__lang = self.detected_languages.language return self.__lang
def __init__(self, text, lang_code=None, word_tokenizer=None, sentiment_weighting=None): if not isinstance(text, basestring): raise TypeError('The `text` argument passed to `__init__(text)` ' 'must be a unicode string, not {0}'.format(type(text))) self.raw = text if not isinstance(text, unicode): self.raw = text.decode("utf-8") self.string = self.raw if lang_code is not None: self.__lang = Language.from_code(lang_code) else: self.__lang = self.detected_languages.language if word_tokenizer is not None: self.__word_tokenizer = word_tokenizer else: self.__word_tokenizer = WordTokenizer(locale=self.language.code) if sentiment_weighting is not None: self.__sentiment_weighting = sentiment_weighting else: self.__sentiment_weighting = long_text_weighting
def language(self, value): self.__lang = Language.from_code(value)
def polyglot_default_install(): """checking and caching polyglot""" try: os.path.sep = '/' polyglot_path = POLIGLOT['path_polyglot_data'] polyglot_path = tools.get_abs_path(polyglot_path) if not os.path.exists(polyglot_path): os.makedirs(polyglot_path) load.polyglot_path = polyglot_path downloader = Downloader(download_dir=polyglot_path) for language in SERVER['language']: sentiment = False entities = False morph = False pos = False full_name = Language.from_code(language).name if language in get_supported_languages('embeddings2'): #if not downloader.is_installed(unicode('embeddings2.' + language)): if not downloader.is_installed('embeddings2.' + language): raise EnvironmentError( 'The {0} module for {1}({2}) was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'embeddings2', full_name, language)) if language in get_supported_languages('ner2'): entities = True #if not downloader.is_installed(unicode('ner2.' + language)): if not downloader.is_installed('ner2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'ner2', full_name)) if language in get_supported_languages('sentiment2'): sentiment = True #if not downloader.is_installed(unicode('sentiment2.' + language)): if not downloader.is_installed('sentiment2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'sentiment2', full_name)) if language in get_supported_languages('morph2'): morph = True # if not downloader.is_installed(unicode('morph2.' + language)): if not downloader.is_installed('morph2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'morph2', full_name)) if language in get_supported_languages('pos2'): pos = True # if not downloader.is_installed(unicode('pos2.' + language)): if not downloader.is_installed('pos2.' + language): raise EnvironmentError( 'The {0} module for {1} was not found, to install this package,' ' run "./install/install_polyglot.py"'.format( 'pos2', full_name)) # FOR POLYGLOT DOWNLOAD ON START SERVER if sentiment or entities or morph: text_polyglot = Text('Testing and cashing', hint_language_code=language) if sentiment: _ = text_polyglot.words[0].polarity _ = text_polyglot.sentences[0].polarity if entities: _ = text_polyglot.entities if morph: _ = text_polyglot.morphemes except: ex_type, ex, tb = sys.exc_info() # tools.message_box(str(ex) + 'TracebackError'+''.join(traceback.format_exc()), # str(ex_type), 0) raise EnvironmentError( str(ex) + 'TracebackError' + ''.join(traceback.format_exc()))