def __init__(self): # Services self.__recognizer = sr.Recognizer() self.__logger = Logger() self._exceptions_handler = ExceptionsHandler() self.__logger.info('SpeechRecognizer was successfully initialized.', __name__)
def __init__(self): # Services self.__logger = Logger() self._exceptions_handler = ExceptionsHandler() self.__logger.info('SpellChecker was successfully initialized.', __name__)
class SpeechRecognizer: def __init__(self): # Services self.__recognizer = sr.Recognizer() self.__logger = Logger() self._exceptions_handler = ExceptionsHandler() self.__logger.info('SpeechRecognizer was successfully initialized.', __name__) def recognize_speech(self): while True: try: with sr.Microphone() as source: speech = self.__recognizer.listen(source) except BaseException as exception: error_message = self._exceptions_handler.get_error_message(exception) self.__logger.error(error_message, __name__) return error_message try: text = self.__recognizer.recognize_google(speech, language="ru-RU").lower().strip() return text except BaseException as exception: error_message = self._exceptions_handler.get_error_message(exception) if isinstance(exception, sr.WaitTimeoutError): self.__logger.warning(self._exceptions_handler.get_error_message(exception), __name__) else: self.__logger.error(error_message, __name__) return error_message
def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._container = ClassificationDataContainer() self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN'] self.__logger.info('Classifier was successfully initialized.', __name__)
def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._configurator = Configurator() self._exceptions_handler = ExceptionsHandler() # Data self._wd = os.getcwd() self._request_url = None self.databases_public_keys = None self.__logger.info('DatabaseCursor was successfully initialized.', __name__)
def __init__(self): # Services self._database_cursor = DatabaseCursor() self.__logger = Logger() self._exceptions_hanlder = ExceptionsHandler() self._lemmatizer = Lemmatizer() self._path_service = PathService() self._configurator = Configurator() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._vec_model = None self._load_vec_model() self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)
class SpellChecker: def __init__(self): # Services self.__logger = Logger() self._exceptions_handler = ExceptionsHandler() self.__logger.info('SpellChecker was successfully initialized.', __name__) def check_spelling(self, text): self.__logger.info(f'Start text: {text}', __name__) try: response = requests.get( 'https://speller.yandex.net/services/spellservice.json/checkText', params={ 'text': text }).json() except BaseException as exception: self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) return text for word in response: text = text.replace(word['word'], word['s'][0]) self.__logger.info(f'Checked text: {text}', __name__) return text
def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._config = dict() self._wd = os.getcwd() self._path_to_databases = None self._request_url = None self._vector_model_public_key = None self._databases_public_keys = None self._load_public_keys() self.__logger.info('Configurator was successfully initialized.', __name__)
class DatabaseCursor: def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._configurator = Configurator() self._exceptions_handler = ExceptionsHandler() # Data self._wd = os.getcwd() self._request_url = None self.databases_public_keys = None self.__logger.info('DatabaseCursor was successfully initialized.', __name__) def _load_config(self): path_to_config = os.path.join(self._path_service.path_to_configs, 'database_cursor.json') if os.path.exists(path_to_config): with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._request_url = config['request_url'] self.databases_public_keys = config['database_public_keys'] else: self.__logger.error( "Can't load config for DatabaseCursor (doesn't exist).", __name__) def __update_connection(self, ngram): path_to_db = None if ngram.count(' ') == 0: path_to_db = self._path_service.get_path_to_database('unigrams.db') elif ngram.count(' ') == 1: path_to_db = self._path_service.get_path_to_database('bigrams.db') elif ngram.count(' ') == 2: path_to_db = self._path_service.get_path_to_database('trigrams.db') if path_to_db and os.path.exists(path_to_db): self.__logger.info(f'Connected to database: {path_to_db}', __name__) return sqlite3.connect(path_to_db) else: self.__logger.warning(f'Database lost: {path_to_db}', __name__) self.__logger.info('Trying to download database from cloud...', __name__) self._configurator.download_database(path_to_db) self.__logger.info(f'Connected to database: {path_to_db}', __name__) if os.path.exists(path_to_db): return sqlite3.connect(path_to_db) else: self.__logger.fatal("Database doesn't exist.", __name__) def get_entry(self, ngram): connection = self.__update_connection(ngram) cursor = connection.cursor() request = (""" SELECT * FROM 'Data' WHERE Ngram='%s' """) % ngram self.__logger.info(f'Request to DB: {request.strip()}', __name__) try: cursor.execute(request) self.__logger.info('Request is OK.', __name__) except BaseException as exception: connection.close() self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) return result = cursor.fetchone() self.__logger.info(f'Received data: {str(result)}', __name__) if result: connection.close() return result[1], result[2] else: connection.close() def entry_exists(self, ngram): connection = self.__update_connection(ngram) cursor = connection.cursor() request = (""" SELECT * FROM 'Data' WHERE Ngram='%s' """) % ngram self.__logger.info(f'Request to DB: {request.strip()}', __name__) try: cursor.execute(request) self.__logger.info('Request is OK.', __name__) except BaseException as exception: connection.close() self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) return if cursor.fetchone(): connection.close() self.__logger.info('Entry exists.', __name__) return True else: connection.close() self.__logger.info("Entry doesn't exist.", __name__) return False
class NgramAnalyzer: def __init__(self): # Services self._database_cursor = DatabaseCursor() self.__logger = Logger() self._exceptions_hanlder = ExceptionsHandler() self._lemmatizer = Lemmatizer() self._path_service = PathService() self._configurator = Configurator() self._morph_analyzer = pymorphy2.MorphAnalyzer() # Data self._vec_model = None self._load_vec_model() self.__logger.info('NgramAnalyzer was successfully initialized.', __name__) def _load_vec_model(self): if not self._path_service.path_to_vector_model: self.__logger.warning("Vector model doesn't exist.", __name__) self._configurator.download_vector_model() self._path_service.set_path_to_vector_model(os.path.join(self._path_service.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')) self.__logger.info('Vector model was successfully downloaded.', __name__) if self._path_service.path_to_vector_model: self._vec_model = gensim.models.KeyedVectors.load_word2vec_format(self._path_service.path_to_vector_model, binary=True) else: self.__logger.error("Vector model doesn't exist.", __name__) def _part_of_speech_detect(self, word): if not word: return part_of_speech = self._morph_analyzer.parse(word)[0].tag.POS if part_of_speech: if re.match(r'ADJ', part_of_speech): return 'ADJ' elif re.match(r'PRT', part_of_speech): return 'PRT' elif part_of_speech == 'INFN': return 'VERB' elif part_of_speech == 'ADVB' or part_of_speech == 'PRED': return 'ADV' elif part_of_speech == 'PRCL': return 'PART' return part_of_speech @staticmethod def _detect_ngram_type(ngram): if not ngram: return if ngram.count(' ') == 0: return 'unigram' elif ngram.count(' ') == 1: return 'bigram' elif ngram.count(' ') == 2: return 'trigram' def _nearest_synonyms_find(self, word, topn): if not self._vec_model or not word or topn <= 0: return nearest_synonyms = list() part_of_speech = self._part_of_speech_detect(word) ngram_type = self._detect_ngram_type(word) if part_of_speech: word = word + '_%s' % self._part_of_speech_detect(word) try: for synonym in self._vec_model.most_similar(positive=[word], topn=topn * 10): found_synonym = self._lemmatizer.get_text_initial_form(synonym[0].split('_')[0]) if found_synonym and self._detect_ngram_type(found_synonym) == ngram_type: nearest_synonyms.append({'word': found_synonym, 'cosine proximity': synonym[1]}) if len(nearest_synonyms) == topn: break except BaseException as exception: self.__logger.warning(self._exceptions_hanlder.get_error_message(exception), __name__) return return nearest_synonyms def relevant_ngram_find(self, ngram): if not ngram: return self.__logger.info(f'Start ngram: {ngram}', __name__) response = {'synonym_found': False, 'content': dict()} if self._detect_ngram_type(ngram) == 'unigram': synonyms_count = 10 nearest_synonyms = self._nearest_synonyms_find(ngram, synonyms_count) if not nearest_synonyms: return response for nearest_synonym in nearest_synonyms: data = self._database_cursor.get_entry(nearest_synonym['word']) if data and data[0]: self.__logger.info(f'Relevant ngram: {nearest_synonym["word"]}', __name__) response['synonym_found'] = True response['content']['synonym'] = nearest_synonym['word'] response['content']['pos_docs'] = data[0] response['content']['neg_docs'] = data[1] return response return response
class Configurator(metaclass=Singleton): def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._config = dict() self._wd = os.getcwd() self._path_to_databases = None self._request_url = None self._vector_model_public_key = None self._databases_public_keys = None self._load_public_keys() self.__logger.info('Configurator was successfully initialized.', __name__) def _load_public_keys(self): path_to_config = os.path.join(self._path_service.path_to_configs, 'configurator.json') if os.path.exists(path_to_config): with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._request_url = config['request_url'] self._vector_model_public_key = config['vector_model_public_key'] self._databases_public_keys = config['databases_public_keys'] else: self.__logger.error( "Can't load config for Configrurator (doesn't exist).", __name__) def download_database(self, path_to_db): database_name = os.path.split(path_to_db)[1] if database_name: try: download_url = requests.get( self._request_url, params={ 'public_key': self._databases_public_keys[database_name] }).json()["href"] with open(path_to_db, 'wb') as database_file: database_file.write(requests.get(download_url).content) self._config[path_to_db] = 'downloaded' except BaseException as exception: self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) self._config[path_to_db] = 'error' def download_vector_model(self): self._path_service.set_path_to_vector_model( os.path.join(self._path_service.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')) try: download_url = requests.get(self._request_url, params={ 'public_key': self._vector_model_public_key }).json()["href"] with open(self._path_service.path_to_vector_model, 'wb') as vec_model: vec_model.write(requests.get(download_url).content) self._config[ 'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'downloaded' except BaseException as exception: self.__logger.error( self._exceptions_handler.get_error_message(exception), __name__) self._config[ 'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'error' def configure_system(self): self._config['datetime'] = str(datetime.now()) for database in ['unigrams.db', 'bigrams.db', 'trigrams.db']: path_to_database = self._path_service.get_path_to_database( database) if not path_to_database or not os.path.exists(path_to_database): self.__logger.warning('Database not found: %s' % str(database), __name__) self.download_database( os.path.join(self._path_service.path_to_databases, database)) else: self._config[database] = 'exists' if not self._path_service.path_to_vector_model or not os.path.exists( self._path_service.path_to_vector_model): self.__logger.warning('Vector model not found.', __name__) self.download_vector_model() else: self._config[ 'ruscorpora_upos_skipgram_300_10_2017.bin.gz'] = 'exists' self._create_config() def _create_config(self): with open(os.path.join('Logs', 'config.json'), 'w', encoding='utf-8') as config: json.dump(self._config, config, indent=4)
class Classifier: def __init__(self): # Services self.__logger = Logger() self._path_service = PathService() self._exceptions_handler = ExceptionsHandler() # Data self._container = ClassificationDataContainer() self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN'] self.__logger.info('Classifier was successfully initialized.', __name__) def _load_config(self): path_to_config = os.path.join(self._path_service.path_to_configs, 'classifier.json') if os.path.exists(path_to_config): with open(path_to_config, 'r', encoding='utf-8') as file: config = json.load(file) self._possible_classifiers = config['possible_classifiers'] else: self.__logger.error("Can't load Classifier configuration.", __name__) def customize(self, unigrams_weight, bigrams_weight, trigrams_weight, classifier_name='NBC'): self._container.clear() if classifier_name in self._possible_classifiers: self._container.classifiers['name'] = classifier_name else: self._container.classifiers['name'] = 'NBC' self.__logger.error('Got unknown classifier, set default (NBC).', __name__) self._container.weights['unigrams'] = unigrams_weight self._container.weights['bigrams'] = bigrams_weight self._container.weights['trigrams'] = trigrams_weight try: if self._container.weights['unigrams']: self._container.classifiers['unigrams'] = joblib.load( self._path_service.get_path_to_model( 'unigrams', self._container.classifiers['name'])) if self._container.weights['bigrams']: self._container.classifiers['bigrams'] = joblib.load( self._path_service.get_path_to_model( 'bigrams', self._container.classifiers['name'])) if self._container.weights['trigrams']: self._container.classifiers['trigrams'] = joblib.load( self._path_service.get_path_to_model( 'trigrams', self._container.classifiers['name'])) self.__logger.info('Models were successfully loaded.', __name__) self.__logger.info('Classifier was successfully configured.', __name__) except BaseException as exception: self.__logger.fatal( self._exceptions_handler.get_error_message(exception), __name__) def _predict_tonal_by_unigrams(self): self._container.tonalities['unigrams'] = self._container.classifiers[ 'unigrams'].predict(self._container.weights['unigrams'])[0] self._container.probabilities['unigrams'] = max( self._container.classifiers['unigrams'].predict_proba( self._container.weights['unigrams'])[0]) self.__logger.info( f'Unigrams tonal: {self._container.tonalities["unigrams"]}', __name__) self.__logger.info( f'Unigrams probability: {self._container.probabilities["unigrams"]}', __name__) def _predict_tonal_by_unigrams_bigrams(self): self._container.tonalities['bigrams'] = self._container.classifiers[ 'bigrams'].predict([[ self._container.weights['unigrams'], self._container.weights['bigrams'] ]])[0] self._container.probabilities['bigrams'] = max( self._container.classifiers['bigrams'].predict_proba([[ self._container.weights['unigrams'], self._container.weights['bigrams'] ]])[0]) self.__logger.info( f'Bigrams tonal: {self._container.tonalities["bigrams"]}', __name__) self.__logger.info( f'Bigrams probability: {self._container.probabilities["bigrams"]}', __name__) def _predict_tonal_by_unigrams_bigrams_trigrams(self): self._container.tonalities['trigrams'] = self._container.classifiers[ 'trigrams'].predict([[ self._container.weights['unigrams'], self._container.weights['bigrams'], self._container.weights['trigrams'] ]])[0] self._container.probabilities['trigrams'] = max( self._container.classifiers['trigrams'].predict_proba([[ self._container.weights['unigrams'], self._container.weights['bigrams'], self._container.weights['trigrams'] ]])[0]) self.__logger.info( f'Trigrams tonal: {self._container.tonalities["trigrams"]}', __name__) self.__logger.info( f'Trigrams probability: {self._container.probabilities["trigrams"]}', __name__) def _predict_intermediate_tonalities(self): threads = list() if self._container.weights['unigrams']: threads.append( Thread(target=self._predict_tonal_by_unigrams, args=())) if self._container.weights['bigrams']: threads.append( Thread(target=self._predict_tonal_by_unigrams_bigrams, args=())) if self._container.weights['trigrams']: threads.append( Thread(target=self._predict_tonal_by_unigrams_bigrams_trigrams, args=())) for thread in threads: thread.start() for thread in threads: while thread.is_alive(): time.sleep(0.1) thread.join() def _select_final_tonal(self): if self._container.tonalities['unigrams'] and self._container.tonalities['bigrams'] and \ self._container.tonalities['trigrams']: if self._container.tonalities[ 'unigrams'] == self._container.tonalities['bigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities['final'] = max( self._container.probabilities['unigrams'], self._container.probabilities['bigrams']) elif self._container.tonalities[ 'unigrams'] == self._container.tonalities['trigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities['final'] = max( self._container.probabilities['unigrams'], self._container.probabilities['trigrams']) elif self._container.tonalities[ 'bigrams'] == self._container.tonalities['trigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['bigrams'] self._container.probabilities['final'] = max( self._container.probabilities['bigrams'], self._container.probabilities['trigrams']) elif self._container.tonalities[ 'unigrams'] and self._container.tonalities['bigrams']: if self._container.tonalities[ 'unigrams'] != self._container.tonalities['bigrams']: if self._container.probabilities[ 'unigrams'] >= self._container.probabilities['bigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities[ 'final'] = self._container.probabilities['unigrams'] else: self._container.tonalities[ 'final'] = self._container.tonalities['bigrams'] self._container.probabilities[ 'final'] = self._container.probabilities['bigrams'] elif self._container.tonalities[ 'unigrams'] == self._container.tonalities['bigrams']: self._container.tonalities[ 'final'] = self._container.tonalities['unigrams'] self._container.probabilities['final'] = max( self._container.probabilities['bigrams'], self._container.probabilities['unigrams']) elif self._container.tonalities['unigrams']: self._container.tonalities['final'] = self._container.tonalities[ 'unigrams'] self._container.probabilities[ 'final'] = self._container.probabilities['unigrams'] def predict_tonal(self): self._predict_intermediate_tonalities() self._select_final_tonal() self.__logger.info( f'Final tonal: {self._container.tonalities["final"]}', __name__) self.__logger.info( f'Final probability: {self._container.probabilities["final"]}', __name__) return self._container.tonalities[ 'final'], self._container.probabilities['final']