def __init__(self):
        # Services
        self.__logger = Logger()
        self._exceptions_handler = ExceptionsHandler()

        self.__logger.info('SpellChecker was successfully initialized.',
                           __name__)
    def __init__(self):
        self.__logger = Logger()

        self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(),
                                                            requests.TooManyRedirects(), requests.Timeout(),
                                                            requests.TooManyRedirects(),
                                                            requests.RequestException(), requests.ConnectTimeout(),
                                                            requests.ReadTimeout()]]

        self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(),
                                                       ZeroDivisionError(), SystemError(), ValueError(),
                                                       AssertionError()]]

        self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]]

        self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(),
                                                         sqlite3.ProgrammingError(), sqlite3.DatabaseError(),
                                                         sqlite3.NotSupportedError(), sqlite3.IntegrityError(),
                                                         sqlite3.InterfaceError(), sqlite3.InternalError(),
                                                         sqlite3.OperationalError()]]

        self._speech_recognizer_errors = [type(item) for item in
                                          [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(),
                                           sr.RequestError()]]

        self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)
예제 #3
0
    def __init__(self):
        # Services
        self._spell_checker = SpellChecker()
        self.__logger = Logger()
        self._path_service = PathService()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._stop_words = self._read_stop_words()
        self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP']

        self.__logger.info('Lemmatizer was successfully initialized.',
                           __name__)
예제 #4
0
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self._ngram_analyzer = NgramAnalyzer()
        self.__logger = Logger()
        self._path_service = PathService()

        # Data
        self._docs_count = dict()

        self._count_all_docs()

        self.__logger.info('TextWeightCounter was successfully initialized.',
                           __name__)
class SpellChecker:
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._exceptions_handler = ExceptionsHandler()

        self.__logger.info('SpellChecker was successfully initialized.',
                           __name__)

    def check_spelling(self, text):
        self.__logger.info(f'Start text: {text}', __name__)

        try:
            response = requests.get(
                'https://speller.yandex.net/services/spellservice.json/checkText',
                params={
                    'text': text
                }).json()

        except BaseException as exception:
            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return text

        for word in response:
            text = text.replace(word['word'], word['s'][0])

        self.__logger.info(f'Checked text: {text}', __name__)
        return text
class FileReader(QWidget):
    def __init__(self):
        super().__init__()
        self.__logger = Logger()
        self.__file_dialog = QFileDialog()

        self.__logger.info('FileReader was successfully initialized.', __name__)

    def _detect_encoding(self, filename):
        with open(filename, 'rb') as byte_file:
            byte_string = byte_file.read()

        encoding = chardet.detect(byte_string)['encoding']

        self.__logger.info(f"File's encoding: {encoding}", __name__)

        return encoding

    def get_file_content(self):
        try:
            filename = self.__file_dialog.getOpenFileName(self, 'Open file', '/home')[0]
            self.__logger.info(f'Filename: {filename}', __name__)

            if filename:
                with open(filename, 'r', encoding=self._detect_encoding(filename)) as file:
                    return file.read()

        except BaseException as exception:
            self.__logger.error(str(exception), __name__)
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self.__logger = Logger()
        self._exceptions_hanlder = ExceptionsHandler()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._vec_model = None

        self._load_vec_model()

        self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)
예제 #8
0
    def __init__(self):
        # Services
        self.__logger = Logger()

        # Data
        self._wd = os.getcwd()
        self.path_to_databases = None
        self.path_to_configs = None
        self._valid_classifiers = None
        self._valid_model_types = None
        self._valid_databases = None
        self._valid_test_results_modes = None
        self._valid_datasets = None
        self.path_to_stop_words = None
        self._path_to_main_directory = None
        self.path_to_vector_model = None
        self._path_to_classifier_models = None
        self._path_to_test_results = None

        self.configure()
        self.__logger.info('PathService was successfully configured.', __name__)
class NgramAnalyzer:
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self.__logger = Logger()
        self._exceptions_hanlder = ExceptionsHandler()
        self._lemmatizer = Lemmatizer()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._vec_model = None

        self._load_vec_model()

        self.__logger.info('NgramAnalyzer was successfully initialized.', __name__)

    def _load_vec_model(self):
        if not self._path_service.path_to_vector_model:
            self.__logger.warning("Vector model doesn't exist.", __name__)

            self._configurator.download_vector_model()
            self._path_service.set_path_to_vector_model(os.path.join(self._path_service.path_to_databases,
                                                                     'ruscorpora_upos_skipgram_300_10_2017.bin.gz'))
            self.__logger.info('Vector model was successfully downloaded.', __name__)

        if self._path_service.path_to_vector_model:
            self._vec_model = gensim.models.KeyedVectors.load_word2vec_format(self._path_service.path_to_vector_model,
                                                                              binary=True)
        else:
            self.__logger.error("Vector model doesn't exist.", __name__)

    def _part_of_speech_detect(self, word):
        if not word:
            return

        part_of_speech = self._morph_analyzer.parse(word)[0].tag.POS

        if part_of_speech:
            if re.match(r'ADJ', part_of_speech):
                return 'ADJ'

            elif re.match(r'PRT', part_of_speech):
                return 'PRT'

            elif part_of_speech == 'INFN':
                return 'VERB'

            elif part_of_speech == 'ADVB' or part_of_speech == 'PRED':
                return 'ADV'

            elif part_of_speech == 'PRCL':
                return 'PART'

        return part_of_speech

    @staticmethod
    def _detect_ngram_type(ngram):
        if not ngram:
            return

        if ngram.count(' ') == 0:
            return 'unigram'

        elif ngram.count(' ') == 1:
            return 'bigram'

        elif ngram.count(' ') == 2:
            return 'trigram'

    def _nearest_synonyms_find(self, word, topn):
        if not self._vec_model or not word or topn <= 0:
            return

        nearest_synonyms = list()
        part_of_speech = self._part_of_speech_detect(word)
        ngram_type = self._detect_ngram_type(word)

        if part_of_speech:
            word = word + '_%s' % self._part_of_speech_detect(word)

        try:
            for synonym in self._vec_model.most_similar(positive=[word], topn=topn * 10):
                found_synonym = self._lemmatizer.get_text_initial_form(synonym[0].split('_')[0])

                if found_synonym and self._detect_ngram_type(found_synonym) == ngram_type:
                    nearest_synonyms.append({'word': found_synonym,
                                             'cosine proximity': synonym[1]})

                if len(nearest_synonyms) == topn:
                    break

        except BaseException as exception:
            self.__logger.warning(self._exceptions_hanlder.get_error_message(exception), __name__)
            return

        return nearest_synonyms

    def relevant_ngram_find(self, ngram):
        if not ngram:
            return

        self.__logger.info(f'Start ngram: {ngram}', __name__)

        response = {'synonym_found': False, 'content': dict()}

        if self._detect_ngram_type(ngram) == 'unigram':
            synonyms_count = 10
            nearest_synonyms = self._nearest_synonyms_find(ngram, synonyms_count)

            if not nearest_synonyms:
                return response

            for nearest_synonym in nearest_synonyms:
                data = self._database_cursor.get_entry(nearest_synonym['word'])

                if data and data[0]:
                    self.__logger.info(f'Relevant ngram: {nearest_synonym["word"]}', __name__)

                    response['synonym_found'] = True

                    response['content']['synonym'] = nearest_synonym['word']
                    response['content']['pos_docs'] = data[0]
                    response['content']['neg_docs'] = data[1]

                    return response

        return response
예제 #10
0
class Lemmatizer:
    def __init__(self):
        # Services
        self._spell_checker = SpellChecker()
        self.__logger = Logger()
        self._path_service = PathService()
        self._morph_analyzer = pymorphy2.MorphAnalyzer()

        # Data
        self._stop_words = self._read_stop_words()
        self._parts_of_speech_to_remove = ['NUMR', 'NPRO', 'PREP']

        self.__logger.info('Lemmatizer was successfully initialized.',
                           __name__)

    @staticmethod
    def _contains_latin_letter(word):
        if word:
            return all(map(lambda c: c in ascii_letters, word))

    def _detect_part_of_speech(self, word):
        if word:
            return self._morph_analyzer.parse(word)[0].tag.POS

    def _is_stop_word(self, word):
        if not word:
            self.__logger.warning('Got empty word.', __name__)
            return

        word = f' {word} '

        for stop_words in self._stop_words.values():
            if word in stop_words:
                return True

        return False

    def _remove_words_without_emotions(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        cleaned_text = list()

        for word in re.findall(r'\w+', text):
            if not self._detect_part_of_speech(word) in self._parts_of_speech_to_remove and\
                    not self._is_stop_word(word):
                cleaned_text.append(word)

        return ' '.join(cleaned_text).strip()

    def _read_stop_words(self):
        if os.path.exists(self._path_service.path_to_stop_words):
            with open(self._path_service.path_to_stop_words,
                      'r',
                      encoding='utf-8') as file:
                return json.load(file)

    def _delete_words_contains_latin_letters(self, text):
        text = ' '.join([
            word for word in re.findall(
                r'\w+', self._spell_checker.check_spelling(text.lower()))
            if not self._contains_latin_letter(word) and word.isalpha()
        ]).strip()

        if text:
            return text
        else:
            self.__logger.warning(
                'All words in document contain latin letters or all words are digits.',
                __name__)

    def _get_text_normal_form(self, text):
        return ' '.join([self._morph_analyzer.parse(word)[0].normal_form + ' ' for word in re.findall(r'\w+', text)]) \
            .strip()

    def get_text_initial_form(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        self.__logger.info(f'Start text: {text}', __name__)

        transformations = [
            self._delete_words_contains_latin_letters,
            self._get_text_normal_form, self._remove_words_without_emotions
        ]

        for transformation in transformations:
            text = transformation(text)

            if not text:
                return

        self.__logger.info(f'Lemmatized text: {text}', __name__)
        return text
예제 #11
0
class TextWeightCounter:
    def __init__(self):
        # Services
        self._database_cursor = DatabaseCursor()
        self._ngram_analyzer = NgramAnalyzer()
        self.__logger = Logger()
        self._path_service = PathService()

        # Data
        self._docs_count = dict()

        self._count_all_docs()

        self.__logger.info('TextWeightCounter was successfully initialized.',
                           __name__)

    def _count_docs_in_dataset(self, mode):
        path_to_dataset = self._path_service.get_path_to_dataset(
            f'dataset_with_{mode}.csv')

        with open(path_to_dataset, 'r', encoding='utf-8') as file:
            negative_docs_shift = 10000

            positive_docs = 0
            negative_docs = negative_docs_shift

            for row in csv.reader(file):
                if ''.join(row).split(';')[1] == 'positive':
                    positive_docs += 1
                else:
                    negative_docs += 1

        return positive_docs + negative_docs - negative_docs_shift, positive_docs, negative_docs

    def _count_all_docs(self):
        modes = ['unigrams', 'bigrams', 'trigrams']

        for mode in modes:
            self._docs_count[mode] = dict()
            self._docs_count[mode]['all_docs'], self._docs_count[mode]['positive_docs'], \
            self._docs_count[mode]['negative_docs'] = self._count_docs_in_dataset(mode)

    @staticmethod
    def _detect_ngram_type(ngram):
        if ngram.count(' ') == 0:
            return 'unigram'

        elif ngram.count(' ') == 1:
            return 'bigram'

        elif ngram.count(' ') == 2:
            return 'trigram'

    def _count_ngram_weight(self, ngram):
        self.__logger.info(f'Ngram: {ngram}', __name__)

        ngram_type = self._detect_ngram_type(ngram)
        delta_tf_idf = 0

        self.__logger.info(f'Ngram_type: {ngram_type}', __name__)

        if self._database_cursor.entry_exists(ngram):
            pos_docs_word, neg_docs_word = self._database_cursor.get_entry(
                ngram)

            delta_tf_idf = math.log10(
                (self._docs_count[ngram_type + 's']['negative_docs'] *
                 pos_docs_word) /
                (self._docs_count[ngram_type + 's']['positive_docs'] *
                 neg_docs_word))

        else:
            response = self._ngram_analyzer.relevant_ngram_find(ngram)

            if response['synonym_found']:

                if ngram_type == 'unigram':
                    pos_docs_word, neg_docs_word = response['content'][
                        'pos_docs'], response['content']['neg_docs']

                    if (not (pos_docs_word and neg_docs_word)) or (
                            pos_docs_word == 1 and neg_docs_word == 1):
                        return 0

                    delta_tf_idf = math.log10(
                        (self._docs_count[ngram_type + 's']['negative_docs'] *
                         pos_docs_word) /
                        (self._docs_count[ngram_type + 's']['positive_docs'] *
                         neg_docs_word))

        return delta_tf_idf

    def count_weight_by_unigrams(self, unigrams):
        checked_unigrams = list()
        important_unigrams = list()
        unigrams_weight = 0

        for unigram in unigrams:
            if unigram not in checked_unigrams:
                this_doc_unigram = unigrams.count(unigram)
                unigram_weight = this_doc_unigram * self._count_ngram_weight(
                    unigram)
                unigrams_weight += unigram_weight

                checked_unigrams.append(unigram)

                if unigram_weight:
                    important_unigrams.append(unigram)

        if len(important_unigrams) >= round(
                len(unigrams) * 0.6) and important_unigrams:
            unigrams_weight = unigrams_weight / len(important_unigrams)

            self.__logger.info(f'Unigrams weight: {unigrams_weight}', __name__)

            return unigrams_weight

    def count_weight_by_bigrams(self, bigrams):
        if not bigrams:
            return None

        checked_bigrams = list()
        important_bigrams = list()
        bigrams_weight = 0

        for bigram in bigrams:
            if bigram not in checked_bigrams:
                this_doc_bigram = bigrams.count(bigram)
                bigram_weight = this_doc_bigram * self._count_ngram_weight(
                    bigram)
                bigrams_weight += bigram_weight

                checked_bigrams.append(bigram)

                if bigram_weight:
                    important_bigrams.append(bigram)

        if len(important_bigrams) >= len(bigrams) // 2 and important_bigrams:
            bigrams_weight = bigrams_weight / len(important_bigrams)

            self.__logger.info(f'Bigrams weight: {bigrams_weight}', __name__)

            return bigrams_weight

    def count_weight_by_trigrams(self, trigrams):
        if not trigrams:
            return None

        checked_trigrams = list()
        important_trigrams = list()
        trigrams_weight = 0

        for trigram in trigrams:
            if trigram not in checked_trigrams:
                this_doc_trigram = trigrams.count(trigram)
                trigram_weight = this_doc_trigram * self._count_ngram_weight(
                    trigram)
                trigrams_weight += trigram_weight

                checked_trigrams.append(trigram)

                if trigram_weight:
                    important_trigrams.append(trigram)

        if len(important_trigrams) >= round(
                len(trigrams) * 0.4) and important_trigrams:
            trigrams_weight = trigrams_weight / len(important_trigrams)

            self.__logger.info(f'Trigrams weight: {trigrams_weight}', __name__)

            return trigrams_weight
예제 #12
0
class PathService(metaclass=Singleton):
    def __init__(self):
        # Services
        self.__logger = Logger()

        # Data
        self._wd = os.getcwd()
        self.path_to_databases = None
        self.path_to_configs = None
        self._valid_classifiers = None
        self._valid_model_types = None
        self._valid_databases = None
        self._valid_test_results_modes = None
        self._valid_datasets = None
        self.path_to_stop_words = None
        self._path_to_main_directory = None
        self.path_to_vector_model = None
        self._path_to_classifier_models = None
        self._path_to_test_results = None

        self.configure()
        self.__logger.info('PathService was successfully configured.', __name__)

    def _find_main_directory(self):
        max_nesting_level = 5
        nesting_level = 0

        while not os.getcwd().endswith('Python'):
            if os.getcwd().endswith('Databases'):
                os.chdir(os.path.join('..', 'Python'))
                break
            else:
                os.chdir('..')

            nesting_level += 1

            if nesting_level > max_nesting_level:
                self.__logger.fatal("Can't find main directory (exceeded maximum nesting level).", __name__)
                exit(-1)

        self._path_to_main_directory = os.getcwd()
        self.path_to_configs = os.path.join(self._path_to_main_directory, 'Services', 'Configs')
        self.path_to_databases = os.path.abspath(os.path.join('..', 'Databases'))

        os.chdir(self._wd)

    def _check_paths_existing(self):
        if not os.path.exists(self.path_to_configs):
            self.__logger.fatal("Directory with config files doesn't exist.", __name__)
            exit(-1)

        elif not os.path.exists(self.path_to_databases):
            self.__logger.fatal("Directory with databases doesn't exist.", __name__)
            exit(-1)

        elif not os.path.exists(self._path_to_classifier_models):
            self.__logger.fatal("Directory with classifier models doesn't exist.", __name__)
            exit(-1)

        if not os.path.exists(self.path_to_vector_model):
            self.path_to_vector_model = None
            self.__logger.error("Vector model doesn't exist.", __name__)

        if not os.path.exists(self.path_to_stop_words):
            self.path_to_stop_words = None
            self.__logger.error("File with stop-words doesn't exist.", __name__)

        if not os.path.exists(self._path_to_test_results):
            self._path_to_test_results = None
            self.__logger.warning("Directory with tests reports doesn't exist.", __name__)

    def _load_config(self):
        path_to_config = os.path.join(self.path_to_configs, 'path_service.json')

        if not os.path.exists(path_to_config):
            self.__logger.error("Can't find config-file for PathService.", __name__)

        with open(path_to_config, 'r', encoding='utf-8') as file:
            config = json.load(file)

        self._valid_classifiers = config['valid_classifiers']
        self._valid_databases = config['valid_databases']
        self._valid_datasets = config['valid_datasets']
        self._valid_test_results_modes = config['valid_test_results_modes']
        self._valid_model_types = config['valid_model_types']

    def configure(self):
        self._find_main_directory()
        self._load_config()

        self.path_to_vector_model = os.path.join(self.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')
        self.path_to_stop_words = os.path.join(self._path_to_main_directory, 'Services', 'Lemmatizer',
                                               'stop_words.json')
        self._path_to_classifier_models = os.path.join(self.path_to_databases, 'Models')
        self._path_to_test_results = os.path.join(self._path_to_main_directory, 'Tests', 'System', 'Reports')

        self._check_paths_existing()

    def get_path_to_test_results(self, mode='classifier', classifier_name='NBC'):
        if classifier_name not in self._valid_classifiers:
            self.__logger.warning('Got incorrect classifier name.', __name__)
            classifier_name = 'NBC'

        if classifier_name not in self._valid_test_results_modes:
            self.__logger.warning('Got incorrect mode.', __name__)
            return self._path_to_test_results

        if mode.lower().strip() == 'vec_model':
            return os.path.join(self._path_to_test_results, 'VectorModel')

        elif mode.lower().strip() == 'classifier_main':
            return os.path.join(self._path_to_test_results, '..', '..', 'MainReports', 'Classifier', classifier_name)

        elif mode.lower().strip() == 'classifier':
            return self._path_to_test_results

    def get_path_to_model(self, model='unigrams', classifier_name='NBC'):
        if classifier_name not in self._valid_classifiers:
            self.__logger.warning('Got incorrect classifier name.', __name__)
            classifier_name = 'NBC'

        if model not in self._valid_model_types:
            self.__logger.warning('Got incorrect model type.', __name__)
            model = 'unigrams'

        path_to_models = os.path.join(self._path_to_classifier_models, classifier_name)

        if os.path.exists(path_to_models):
            path_to_required_model = os.path.join(path_to_models, f'model_{model}.pkl')

            return path_to_required_model
        else:
            self.__logger.error("Required model wasn't found.", __name__)

    def get_path_to_database(self, database_name='unigrams.db'):
        if database_name not in self._valid_databases:
            self.__logger.warning('Got incorrect database name.', __name__)
            database_name = 'unigrams.db'

        path_to_database = os.path.join(self.path_to_databases, database_name)

        return path_to_database

    def get_path_to_dataset(self, dataset):
        if dataset not in self._valid_datasets:
            self.__logger.warning('Got incorrect dataset name.', __name__)
            dataset = 'dataset_with_unigrams.csv'

        path_to_dataset = os.path.join(self.path_to_databases, dataset)

        return path_to_dataset

    def set_path_to_vector_model(self, path_to_vector_model):
        self.path_to_vector_model = path_to_vector_model
class ExceptionsHandler:
    def __init__(self):
        self.__logger = Logger()

        self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(),
                                                            requests.TooManyRedirects(), requests.Timeout(),
                                                            requests.TooManyRedirects(),
                                                            requests.RequestException(), requests.ConnectTimeout(),
                                                            requests.ReadTimeout()]]

        self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(),
                                                       ZeroDivisionError(), SystemError(), ValueError(),
                                                       AssertionError()]]

        self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]]

        self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(),
                                                         sqlite3.ProgrammingError(), sqlite3.DatabaseError(),
                                                         sqlite3.NotSupportedError(), sqlite3.IntegrityError(),
                                                         sqlite3.InterfaceError(), sqlite3.InternalError(),
                                                         sqlite3.OperationalError()]]

        self._speech_recognizer_errors = [type(item) for item in
                                          [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(),
                                           sr.RequestError()]]

        self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)

    @staticmethod
    def _handle_system_exception(exception):
        if isinstance(exception, KeyError):
            return 'Key error occurred.'
        elif isinstance(exception, AttributeError):
            return 'AttributeError occurred.'
        elif isinstance(exception, IndexError):
            return 'Index error occurred.'
        elif isinstance(exception, ZeroDivisionError):
            return 'ZeroDivisionError occurred.'
        elif isinstance(exception, SystemError):
            return 'SystemError occurred.'
        elif isinstance(exception, ValueError):
            return 'ValueError occurred.'
        elif isinstance(exception, AssertionError):
            return 'Assertion error occurred.'

    @staticmethod
    def _handle_file_exception(exception):
        if isinstance(exception, FileNotFoundError):
            return f'FileNotFoundError occurred ({str(exception.filename)}).'
        elif isinstance(exception, FileExistsError):
            return f'FileExistsError occurred ({str(exception.filename)}).'

    @staticmethod
    def _handle_database_exception(exception):
        if isinstance(exception, sqlite3.OperationalError):
            return 'sqlite3.Operational occurred.'
        elif isinstance(exception, sqlite3.ProgrammingError):
            return 'sqlite3.ProgrammingError occurred.'
        elif isinstance(exception, sqlite3.InternalError):
            return 'sqlite3.InternalError occurred.'
        elif isinstance(exception, sqlite3.InterfaceError):
            return 'sqlite3.InterfaceError occurred.'
        elif isinstance(exception, sqlite3.IntegrityError):
            return 'sqlite3.IntegrityError occurred.'
        elif isinstance(exception, sqlite3.NotSupportedError):
            return 'sqlite3.NotSupportedError occurred.'
        elif isinstance(exception, sqlite3.DatabaseError):
            return 'sqlite3.DatabaseError occurred.'
        elif isinstance(exception, sqlite3.DataError):
            return 'sqlite3.DataError occurred.'
        elif isinstance(exception, sqlite3.Error):
            return 'sqlite3.Error occurred.'

    @staticmethod
    def _handle_request_exception(exception):
        if isinstance(exception, requests.ConnectionError):
            return 'Problems with connection (requests.ConnectionError).'
        elif isinstance(exception, requests.HTTPError):
            return 'HHTP request return unsuccessful status code (requests.HTTPError).'
        elif isinstance(exception, requests.Timeout):
            return 'Request times out (requests.Timeout).'
        elif isinstance(exception, requests.TooManyRedirects):
            return 'Request exceeds the configured number of maximum redirections (requests.TooManyRedirects).'
        elif isinstance(exception, requests.ConnectTimeout):
            return 'ConnectTimeout (requests.ConnectTimeout).'
        elif isinstance(exception, requests.ReadTimeout):
            return 'ReadTimeout (requests.ReadTimeout).'
        else:
            return 'Request exception (requests.RequestException).'

    @staticmethod
    def _handle_speech_recognizer_exception(exception):
        if isinstance(exception, sr.WaitTimeoutError):
            return 'speech_recognition.WaitTimeoutError occurred.'
        elif isinstance(exception, sr.UnknownValueError):
            return 'Unknown value (speech_recognoition.UnknownValueError).'
        elif isinstance(exception, sr.RequestError):
            return 'speech_recognition.RequestError occurred.'

    def get_error_message(self, exception):
        if type(exception) in self._system_errors:
            return self._handle_system_exception(exception)

        elif type(exception) in self._file_errors:
            return self._handle_file_exception(exception)

        elif type(exception) in self._request_exceptions:
            return ExceptionsHandler._handle_request_exception(exception)

        elif type(exception) in self._speech_recognizer_errors:
            return self._handle_speech_recognizer_exception(exception)

        return str(exception)
예제 #14
0
    def __init__(self):
        super().__init__()
        self.__logger = Logger()
        self.__file_dialog = QFileDialog()

        self.__logger.info('FileReader was successfully initialized.', __name__)
    def __init__(self):
        self.__logger = Logger()

        self.__logger.info('DocumentPreparer was successfully initialized.',
                           __name__)
class DocumentPreparer:
    def __init__(self):
        self.__logger = Logger()

        self.__logger.info('DocumentPreparer was successfully initialized.',
                           __name__)

    def split_into_unigrams(self, text):
        if text:
            return re.findall(r'\w+', text)
        else:
            self.__logger.warning('Got empty text.', __name__)

    def split_into_bigrams(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        unigrams = self.split_into_unigrams(text)
        bigrams = list()

        if len(unigrams) >= 2:
            for unigram_index in range(len(unigrams) - 1):
                bigram = ' '.join(
                    sorted(
                        [unigrams[unigram_index],
                         unigrams[unigram_index + 1]])).strip()
                bigrams.append(bigram)

            return bigrams
        else:
            self.__logger.info("Text doesn't contain enough words.", __name__)

    def split_into_trigrams(self, text):
        if not text:
            self.__logger.warning('Got empty text.', __name__)
            return

        unigrams = self.split_into_unigrams(text)
        trigrams = list()

        if len(unigrams) >= 3:
            for unigram_index in range(len(unigrams) - 2):
                trigram = ' '.join(
                    sorted([
                        unigrams[unigram_index], unigrams[unigram_index + 1],
                        unigrams[unigram_index + 2]
                    ])).strip()

                trigrams.append(trigram)

            return trigrams
        else:
            self.__logger.info("Text doesn't contain enough words.", __name__)