class DatabaseCursor:
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._configurator = Configurator()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._wd = os.getcwd()
        self._request_url = None
        self.databases_public_keys = None

        self.__logger.info('DatabaseCursor was successfully initialized.',
                           __name__)

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'database_cursor.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._request_url = config['request_url']
            self.databases_public_keys = config['database_public_keys']
        else:
            self.__logger.error(
                "Can't load config for DatabaseCursor (doesn't exist).",
                __name__)

    def __update_connection(self, ngram):
        path_to_db = None

        if ngram.count(' ') == 0:
            path_to_db = self._path_service.get_path_to_database('unigrams.db')

        elif ngram.count(' ') == 1:
            path_to_db = self._path_service.get_path_to_database('bigrams.db')

        elif ngram.count(' ') == 2:
            path_to_db = self._path_service.get_path_to_database('trigrams.db')

        if path_to_db and os.path.exists(path_to_db):
            self.__logger.info(f'Connected to database: {path_to_db}',
                               __name__)

            return sqlite3.connect(path_to_db)

        else:
            self.__logger.warning(f'Database lost: {path_to_db}', __name__)
            self.__logger.info('Trying to download database from cloud...',
                               __name__)

            self._configurator.download_database(path_to_db)

            self.__logger.info(f'Connected to database: {path_to_db}',
                               __name__)

            if os.path.exists(path_to_db):
                return sqlite3.connect(path_to_db)
            else:
                self.__logger.fatal("Database doesn't exist.", __name__)

    def get_entry(self, ngram):
        connection = self.__update_connection(ngram)
        cursor = connection.cursor()

        request = ("""
        SELECT * FROM 'Data' WHERE Ngram='%s'
        """) % ngram

        self.__logger.info(f'Request to DB: {request.strip()}', __name__)

        try:
            cursor.execute(request)
            self.__logger.info('Request is OK.', __name__)

        except BaseException as exception:
            connection.close()

            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return

        result = cursor.fetchone()
        self.__logger.info(f'Received data: {str(result)}', __name__)

        if result:
            connection.close()

            return result[1], result[2]

        else:
            connection.close()

    def entry_exists(self, ngram):
        connection = self.__update_connection(ngram)
        cursor = connection.cursor()

        request = ("""
        SELECT * FROM 'Data' WHERE Ngram='%s'
        """) % ngram

        self.__logger.info(f'Request to DB: {request.strip()}', __name__)

        try:
            cursor.execute(request)
            self.__logger.info('Request is OK.', __name__)

        except BaseException as exception:
            connection.close()

            self.__logger.error(
                self._exceptions_handler.get_error_message(exception),
                __name__)
            return

        if cursor.fetchone():
            connection.close()

            self.__logger.info('Entry exists.', __name__)
            return True

        else:
            connection.close()

            self.__logger.info("Entry doesn't exist.", __name__)
            return False
class PathService(metaclass=Singleton):
    def __init__(self):
        # Services
        self.__logger = Logger()

        # Data
        self._wd = os.getcwd()
        self.path_to_databases = None
        self.path_to_configs = None
        self._valid_classifiers = None
        self._valid_model_types = None
        self._valid_databases = None
        self._valid_test_results_modes = None
        self._valid_datasets = None
        self.path_to_stop_words = None
        self._path_to_main_directory = None
        self.path_to_vector_model = None
        self._path_to_classifier_models = None
        self._path_to_test_results = None

        self.configure()
        self.__logger.info('PathService was successfully configured.', __name__)

    def _find_main_directory(self):
        max_nesting_level = 5
        nesting_level = 0

        while not os.getcwd().endswith('Python'):
            if os.getcwd().endswith('Databases'):
                os.chdir(os.path.join('..', 'Python'))
                break
            else:
                os.chdir('..')

            nesting_level += 1

            if nesting_level > max_nesting_level:
                self.__logger.fatal("Can't find main directory (exceeded maximum nesting level).", __name__)
                exit(-1)

        self._path_to_main_directory = os.getcwd()
        self.path_to_configs = os.path.join(self._path_to_main_directory, 'Services', 'Configs')
        self.path_to_databases = os.path.abspath(os.path.join('..', 'Databases'))

        os.chdir(self._wd)

    def _check_paths_existing(self):
        if not os.path.exists(self.path_to_configs):
            self.__logger.fatal("Directory with config files doesn't exist.", __name__)
            exit(-1)

        elif not os.path.exists(self.path_to_databases):
            self.__logger.fatal("Directory with databases doesn't exist.", __name__)
            exit(-1)

        elif not os.path.exists(self._path_to_classifier_models):
            self.__logger.fatal("Directory with classifier models doesn't exist.", __name__)
            exit(-1)

        if not os.path.exists(self.path_to_vector_model):
            self.path_to_vector_model = None
            self.__logger.error("Vector model doesn't exist.", __name__)

        if not os.path.exists(self.path_to_stop_words):
            self.path_to_stop_words = None
            self.__logger.error("File with stop-words doesn't exist.", __name__)

        if not os.path.exists(self._path_to_test_results):
            self._path_to_test_results = None
            self.__logger.warning("Directory with tests reports doesn't exist.", __name__)

    def _load_config(self):
        path_to_config = os.path.join(self.path_to_configs, 'path_service.json')

        if not os.path.exists(path_to_config):
            self.__logger.error("Can't find config-file for PathService.", __name__)

        with open(path_to_config, 'r', encoding='utf-8') as file:
            config = json.load(file)

        self._valid_classifiers = config['valid_classifiers']
        self._valid_databases = config['valid_databases']
        self._valid_datasets = config['valid_datasets']
        self._valid_test_results_modes = config['valid_test_results_modes']
        self._valid_model_types = config['valid_model_types']

    def configure(self):
        self._find_main_directory()
        self._load_config()

        self.path_to_vector_model = os.path.join(self.path_to_databases, 'ruscorpora_upos_skipgram_300_10_2017.bin.gz')
        self.path_to_stop_words = os.path.join(self._path_to_main_directory, 'Services', 'Lemmatizer',
                                               'stop_words.json')
        self._path_to_classifier_models = os.path.join(self.path_to_databases, 'Models')
        self._path_to_test_results = os.path.join(self._path_to_main_directory, 'Tests', 'System', 'Reports')

        self._check_paths_existing()

    def get_path_to_test_results(self, mode='classifier', classifier_name='NBC'):
        if classifier_name not in self._valid_classifiers:
            self.__logger.warning('Got incorrect classifier name.', __name__)
            classifier_name = 'NBC'

        if classifier_name not in self._valid_test_results_modes:
            self.__logger.warning('Got incorrect mode.', __name__)
            return self._path_to_test_results

        if mode.lower().strip() == 'vec_model':
            return os.path.join(self._path_to_test_results, 'VectorModel')

        elif mode.lower().strip() == 'classifier_main':
            return os.path.join(self._path_to_test_results, '..', '..', 'MainReports', 'Classifier', classifier_name)

        elif mode.lower().strip() == 'classifier':
            return self._path_to_test_results

    def get_path_to_model(self, model='unigrams', classifier_name='NBC'):
        if classifier_name not in self._valid_classifiers:
            self.__logger.warning('Got incorrect classifier name.', __name__)
            classifier_name = 'NBC'

        if model not in self._valid_model_types:
            self.__logger.warning('Got incorrect model type.', __name__)
            model = 'unigrams'

        path_to_models = os.path.join(self._path_to_classifier_models, classifier_name)

        if os.path.exists(path_to_models):
            path_to_required_model = os.path.join(path_to_models, f'model_{model}.pkl')

            return path_to_required_model
        else:
            self.__logger.error("Required model wasn't found.", __name__)

    def get_path_to_database(self, database_name='unigrams.db'):
        if database_name not in self._valid_databases:
            self.__logger.warning('Got incorrect database name.', __name__)
            database_name = 'unigrams.db'

        path_to_database = os.path.join(self.path_to_databases, database_name)

        return path_to_database

    def get_path_to_dataset(self, dataset):
        if dataset not in self._valid_datasets:
            self.__logger.warning('Got incorrect dataset name.', __name__)
            dataset = 'dataset_with_unigrams.csv'

        path_to_dataset = os.path.join(self.path_to_databases, dataset)

        return path_to_dataset

    def set_path_to_vector_model(self, path_to_vector_model):
        self.path_to_vector_model = path_to_vector_model
Exemple #3
0
class MainWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.os = platform.system().lower()

        # Services
        self._speech_recognizer = SpeechRecognizer()
        self._file_reader = FileReader()
        self.__logger = Logger()
        self._path_service = PathService()
        self._text_tonal_analyzer = TextTonalAnalyzer('NBC')

        self._config = None
        self._load_config()

        # GUI Elements
        self.line_edit = QLineEdit(self)
        self.answer_label = QLabel(self)
        self.voice_button = QPushButton(self)
        self.answer_button = QPushButton(self)
        self.file_dialog_button = QPushButton(self)
        self.delete_button = QPushButton(self)
        self.message_box = QMessageBox()

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs, 'demo.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                self._config = json.load(file)

            if self.os == 'windows':
                self._config = self._config['windows']
            else:
                self._config = self._config['darwin']
        else:
            self.__logger.fatal("Config for GUI doesn't exist.", __name__)
            exit(-1)

    def _configure_main_window(self):
        self._set_base_params()

        self._configure_line_edit()
        self._configure_answer_button()
        self.configure_voice_button()
        self._configure_delete_button()
        self._configure_file_dialog_button()
        self._configure_answer_label()

        self.__logger.info('Main window was successfully configured.', __name__)

    def _set_base_params(self):
        self.setFixedSize(*self._config['size'])
        self.setStyleSheet('QWidget { background-color: %s }' % self._config['background-color'])

    def _configure_line_edit(self):
        self.line_edit.setToolTip('Enter the text here')
        self.line_edit.returnPressed.connect(self._answer_button_clicked)

        self.line_edit.resize(*self._config['line-edit']['size'])
        self.line_edit.setStyleSheet('QWidget { background-color: %s }' %
                                     self._config['line-edit']['background-color'])
        self.line_edit.move(*self._config['line-edit']['coordinates'])
        self.line_edit.setFont(QFont(*self._config['line-edit']['font']))

    def _configure_answer_button(self):
        self.answer_button.clicked.connect(self._answer_button_clicked)
        self.answer_button.setText('Start')
        self.answer_button.setToolTip('Push to count tonal')

        self.answer_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['answer-button']['background-color']['hover'],
                                self._config['answer-button']['background-color']['!hover'],
                                self._config['answer-button']['background-color']['pressed']))
        self.answer_button.resize(*self._config['answer-button']['size'])
        self.answer_button.move(*self._config['answer-button']['coordinates'])
        self.answer_button.setFont(QFont(*self._config['answer-button']['font']))

    def configure_voice_button(self):
        self.voice_button.setText('🎙')
        self.voice_button.setToolTip('Push to enter the text by speech')
        self.voice_button.clicked.connect(self._voice_button_clicked)

        self.voice_button.resize(*self._config['voice-button']['size'])
        self.voice_button.setFont(QFont(*self._config['voice-button']['font']))
        self.voice_button.move(*self._config['voice-button']['coordinates'])
        self.voice_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['voice-button']['background-color']['hover'],
                                self._config['voice-button']['background-color']['!hover'],
                                self._config['voice-button']['background-color']['pressed']))

    def _configure_delete_button(self):
        self.delete_button.setText('✗')
        self.delete_button.setToolTip('Push to clear text box')
        self.delete_button.clicked.connect(self._delete_button_clicked)

        self.delete_button.resize(*self._config['delete-button']['size'])
        self.delete_button.setFont(QFont(*self._config['delete-button']['font']))
        self.delete_button.move(*self._config['delete-button']['coordinates'])
        self.delete_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['delete-button']['background-color']['hover'],
                                self._config['delete-button']['background-color']['!hover'],
                                self._config['delete-button']['background-color']['pressed']))

    def _configure_file_dialog_button(self):
        self.file_dialog_button.setText('📂')
        self.file_dialog_button.setToolTip('Push to open file')
        self.file_dialog_button.clicked.connect(self._file_dialog_button_clicked)

        self.file_dialog_button.resize(*self._config['file-dialog-button']['size'])
        self.file_dialog_button.setFont(QFont(*self._config['file-dialog-button']['font']))
        self.file_dialog_button.move(*self._config['file-dialog-button']['coordinates'])
        self.file_dialog_button.setStyleSheet("""
                             QPushButton:hover { background-color: %s }
                             QPushButton:!hover { background-color: %s }
                             QPushButton:pressed { background-color: %s; }
                         """ % (self._config['file-dialog-button']['background-color']['hover'],
                                self._config['file-dialog-button']['background-color']['!hover'],
                                self._config['file-dialog-button']['background-color']['pressed']))

    def _configure_answer_label(self):
        self.answer_label.move(*self._config['answer-label']['coordinates'])
        self.answer_label.setFont(QFont(*self._config['answer-label']['font']))
        self.answer_label.resize(*self._config['answer-label']['size'])

    def launch(self):
        self.setWindowIcon(QIcon('icon.ico'))
        self.setWindowTitle('Sentiment Analyser')

        self._configure_main_window()
        self.show()

        self.__logger.info('Main window was successfully launched.', __name__)

    def _delete_button_clicked(self):
        self.line_edit.clear()
        self.answer_label.clear()

    def _voice_button_clicked(self):
        self.message_box.question(self, 'Speak', 'You can start speeking.', QMessageBox.Ok)

        speech_text = self._speech_recognizer.recognize_speech()

        if speech_text == 'Unknown value':
            try_again = QMessageBox.Yes

            while try_again == QMessageBox.Yes and speech_text == 'Unknown value':
                try_again = self.message_box.question(self, 'Error', 'Unknown value\n Try again?',
                                                      QMessageBox.Yes | QMessageBox.No)
                if try_again == QMessageBox.No:
                    break

                speech_text = self._speech_recognizer.recognize_speech()

        elif speech_text == 'Internet connection lost':
            self.message_box.question(self, 'Error', 'Internet connection lost', QMessageBox.Ok)
            return

        elif speech_text == 'No microphone':
            self.message_box.question(self, 'Error', 'Microphone was disconnected', QMessageBox.Ok)
            return

        if speech_text != 'Unknown value':
            self.line_edit.setText(speech_text)

            return

    def _file_dialog_button_clicked(self):
        file_content = self._file_reader.get_file_content()

        if file_content:
            self.line_edit.setText(file_content)
        else:
            self.__logger.warning('Empty file.', __name__)

    def _answer_button_clicked(self):
        self._text_tonal_analyzer.detect_tonal(self.line_edit.text())

        if self.os == 'windows':
            if self._text_tonal_analyzer.tonal == 'positive':
                self.answer_label.setStyleSheet('QLabel {color:rgba(0, 200, 100, 255)}')
                self.answer_label.move(193.5, 180)

            elif self._text_tonal_analyzer.tonal == 'negative':
                self.answer_label.setStyleSheet('QLabel {color:rgba(255, 56, 20, 255)}')
                self.answer_label.move(180, 180)

        elif self.os == 'darwin':
            if self._text_tonal_analyzer.tonal == 'positive':
                self.answer_label.setStyleSheet('QLabel {color:rgba(0, 200, 100, 255)}')
                self.answer_label.move(230, 210)

            elif self._text_tonal_analyzer.tonal == 'negative':
                self.answer_label.setStyleSheet('QLabel {color:rgba(255, 56, 20, 255)}')
                self.answer_label.move(225, 210)

        self.answer_label.setToolTip('Tonal and probability')

        if self._text_tonal_analyzer.probability:
            self.answer_label.setText(self._text_tonal_analyzer.tonal.capitalize() + '\n' +
                                      str(round(self._text_tonal_analyzer.probability * 100, 3)) + '%')
        else:
            self.answer_label.setText(self._text_tonal_analyzer.tonal.capitalize())
Exemple #4
0
class Classifier:
    def __init__(self):
        # Services
        self.__logger = Logger()
        self._path_service = PathService()
        self._exceptions_handler = ExceptionsHandler()

        # Data
        self._container = ClassificationDataContainer()
        self._possible_classifiers = ['NBC', 'LogisticRegression', 'KNN']

        self.__logger.info('Classifier was successfully initialized.',
                           __name__)

    def _load_config(self):
        path_to_config = os.path.join(self._path_service.path_to_configs,
                                      'classifier.json')

        if os.path.exists(path_to_config):
            with open(path_to_config, 'r', encoding='utf-8') as file:
                config = json.load(file)

            self._possible_classifiers = config['possible_classifiers']
        else:
            self.__logger.error("Can't load Classifier configuration.",
                                __name__)

    def customize(self,
                  unigrams_weight,
                  bigrams_weight,
                  trigrams_weight,
                  classifier_name='NBC'):
        self._container.clear()

        if classifier_name in self._possible_classifiers:
            self._container.classifiers['name'] = classifier_name
        else:
            self._container.classifiers['name'] = 'NBC'
            self.__logger.error('Got unknown classifier, set default (NBC).',
                                __name__)

        self._container.weights['unigrams'] = unigrams_weight
        self._container.weights['bigrams'] = bigrams_weight
        self._container.weights['trigrams'] = trigrams_weight

        try:
            if self._container.weights['unigrams']:
                self._container.classifiers['unigrams'] = joblib.load(
                    self._path_service.get_path_to_model(
                        'unigrams', self._container.classifiers['name']))

            if self._container.weights['bigrams']:
                self._container.classifiers['bigrams'] = joblib.load(
                    self._path_service.get_path_to_model(
                        'bigrams', self._container.classifiers['name']))

            if self._container.weights['trigrams']:
                self._container.classifiers['trigrams'] = joblib.load(
                    self._path_service.get_path_to_model(
                        'trigrams', self._container.classifiers['name']))

            self.__logger.info('Models were successfully loaded.', __name__)
            self.__logger.info('Classifier was successfully configured.',
                               __name__)

        except BaseException as exception:
            self.__logger.fatal(
                self._exceptions_handler.get_error_message(exception),
                __name__)

    def _predict_tonal_by_unigrams(self):
        self._container.tonalities['unigrams'] = self._container.classifiers[
            'unigrams'].predict(self._container.weights['unigrams'])[0]

        self._container.probabilities['unigrams'] = max(
            self._container.classifiers['unigrams'].predict_proba(
                self._container.weights['unigrams'])[0])

        self.__logger.info(
            f'Unigrams tonal: {self._container.tonalities["unigrams"]}',
            __name__)
        self.__logger.info(
            f'Unigrams probability: {self._container.probabilities["unigrams"]}',
            __name__)

    def _predict_tonal_by_unigrams_bigrams(self):
        self._container.tonalities['bigrams'] = self._container.classifiers[
            'bigrams'].predict([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams']
            ]])[0]

        self._container.probabilities['bigrams'] = max(
            self._container.classifiers['bigrams'].predict_proba([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams']
            ]])[0])

        self.__logger.info(
            f'Bigrams tonal: {self._container.tonalities["bigrams"]}',
            __name__)
        self.__logger.info(
            f'Bigrams probability: {self._container.probabilities["bigrams"]}',
            __name__)

    def _predict_tonal_by_unigrams_bigrams_trigrams(self):
        self._container.tonalities['trigrams'] = self._container.classifiers[
            'trigrams'].predict([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams'],
                self._container.weights['trigrams']
            ]])[0]

        self._container.probabilities['trigrams'] = max(
            self._container.classifiers['trigrams'].predict_proba([[
                self._container.weights['unigrams'],
                self._container.weights['bigrams'],
                self._container.weights['trigrams']
            ]])[0])

        self.__logger.info(
            f'Trigrams tonal: {self._container.tonalities["trigrams"]}',
            __name__)
        self.__logger.info(
            f'Trigrams probability: {self._container.probabilities["trigrams"]}',
            __name__)

    def _predict_intermediate_tonalities(self):
        threads = list()

        if self._container.weights['unigrams']:
            threads.append(
                Thread(target=self._predict_tonal_by_unigrams, args=()))

        if self._container.weights['bigrams']:
            threads.append(
                Thread(target=self._predict_tonal_by_unigrams_bigrams,
                       args=()))

        if self._container.weights['trigrams']:
            threads.append(
                Thread(target=self._predict_tonal_by_unigrams_bigrams_trigrams,
                       args=()))

        for thread in threads:
            thread.start()

        for thread in threads:
            while thread.is_alive():
                time.sleep(0.1)

            thread.join()

    def _select_final_tonal(self):
        if self._container.tonalities['unigrams'] and self._container.tonalities['bigrams'] and \
                self._container.tonalities['trigrams']:

            if self._container.tonalities[
                    'unigrams'] == self._container.tonalities['bigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['unigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['unigrams'],
                    self._container.probabilities['bigrams'])

            elif self._container.tonalities[
                    'unigrams'] == self._container.tonalities['trigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['unigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['unigrams'],
                    self._container.probabilities['trigrams'])

            elif self._container.tonalities[
                    'bigrams'] == self._container.tonalities['trigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['bigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['bigrams'],
                    self._container.probabilities['trigrams'])

        elif self._container.tonalities[
                'unigrams'] and self._container.tonalities['bigrams']:

            if self._container.tonalities[
                    'unigrams'] != self._container.tonalities['bigrams']:
                if self._container.probabilities[
                        'unigrams'] >= self._container.probabilities['bigrams']:
                    self._container.tonalities[
                        'final'] = self._container.tonalities['unigrams']
                    self._container.probabilities[
                        'final'] = self._container.probabilities['unigrams']

                else:
                    self._container.tonalities[
                        'final'] = self._container.tonalities['bigrams']
                    self._container.probabilities[
                        'final'] = self._container.probabilities['bigrams']

            elif self._container.tonalities[
                    'unigrams'] == self._container.tonalities['bigrams']:
                self._container.tonalities[
                    'final'] = self._container.tonalities['unigrams']
                self._container.probabilities['final'] = max(
                    self._container.probabilities['bigrams'],
                    self._container.probabilities['unigrams'])

        elif self._container.tonalities['unigrams']:
            self._container.tonalities['final'] = self._container.tonalities[
                'unigrams']
            self._container.probabilities[
                'final'] = self._container.probabilities['unigrams']

    def predict_tonal(self):
        self._predict_intermediate_tonalities()
        self._select_final_tonal()

        self.__logger.info(
            f'Final tonal: {self._container.tonalities["final"]}', __name__)
        self.__logger.info(
            f'Final probability: {self._container.probabilities["final"]}',
            __name__)

        return self._container.tonalities[
            'final'], self._container.probabilities['final']