def enchant_sanity_test(language: str = '', word: str = '') -> bool: '''Checks whether python3-enchant returns some suggestions given a language and a word. :param language: The language of the dictionary to try :param word: The word to give to enchant to ask for suggestions This is used as a sanity check whether python3-enchant works at all. For example, if a Czech dictionary is opened like d = enchant.Dict('cs_CZ') and then something like retval = d.suggest('Praha') returns an empty list instead of a list of some words, then something is seriously wrong with python3-enchant and it is better to skip the test case which relies on python3-enchant working for that language. ''' if not (language and word): return False if not itb_util.get_hunspell_dictionary_wordlist(language)[0]: return False d = enchant.Dict(language) if d.suggest(word): return True return False
def test_korean(self): if not itb_util.get_hunspell_dictionary_wordlist('ko_KR')[0]: # No Korean dictionary file could be found, skip this # test. On some systems, like 'Arch' or 'FreeBSD', there # is no ko_KR.dic hunspell dictionary available, therefore # there is no way to run this test on these systems. # On systems where a Korean hunspell dictionary is available, # make sure it is installed to make this test case run. # In the ibus-typing-booster.spec file for Fedora, # I have a тАЬBuildRequires: hunspell-koтАЭ for that purpose # to make sure this test runs when building the rpm package. return self.engine.set_current_imes(['ko-romaja']) self.engine.set_dictionary_names(['ko_KR']) self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_o, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_g, 0, 0) self.engine.do_process_key_event(IBus.KEY_h, 0, 0) self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_s, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤') candidates = [ unicodedata.normalize('NFC', x[0]) for x in self.engine._candidates ] self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates) self.engine.do_process_key_event(IBus.KEY_o, 0, 0) self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЪФ') self.engine.do_process_key_event(IBus.KEY_space, 0, 0) self.assertEqual(self.engine.mock_preedit_text, '') self.assertEqual(self.engine.mock_committed_text, 'ьХИыЕХэХШьД╕ьЪФ ') self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_o, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_g, 0, 0) self.engine.do_process_key_event(IBus.KEY_h, 0, 0) self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_s, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤') candidates = [ unicodedata.normalize('NFC', x[0]) for x in self.engine._candidates ] self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates) self.assertEqual('ьХИыЕХэХШьД╕ьЪФ', candidates[0])
def test_korean(self): if not itb_util.get_hunspell_dictionary_wordlist('ko_KR')[0]: # No Korean dictionary file could be found, skip this # test. On some systems, like 'Arch' or 'FreeBSD', there # is no ko_KR.dic hunspell dictionary available, therefore # there is no way to run this test on these systems. # On systems where a Korean hunspell dictionary is available, # make sure it is installed to make this test case run. # In the ibus-typing-booster.spec file for Fedora, # I have a тАЬBuildRequires: hunspell-koтАЭ for that purpose # to make sure this test runs when building the rpm package. return self.engine.set_current_imes(['ko-romaja']) self.engine.set_dictionary_names(['ko_KR']) self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_o, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_g, 0, 0) self.engine.do_process_key_event(IBus.KEY_h, 0, 0) self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_s, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤') candidates = [unicodedata.normalize('NFC', x[0]) for x in self.engine._candidates] self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates) self.engine.do_process_key_event(IBus.KEY_o, 0, 0) self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЪФ') self.engine.do_process_key_event(IBus.KEY_space, 0, 0) self.assertEqual(self.engine.mock_preedit_text, '') self.assertEqual(self.engine.mock_committed_text, 'ьХИыЕХэХШьД╕ьЪФ ') self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_o, 0, 0) self.engine.do_process_key_event(IBus.KEY_n, 0, 0) self.engine.do_process_key_event(IBus.KEY_g, 0, 0) self.engine.do_process_key_event(IBus.KEY_h, 0, 0) self.engine.do_process_key_event(IBus.KEY_a, 0, 0) self.engine.do_process_key_event(IBus.KEY_s, 0, 0) self.engine.do_process_key_event(IBus.KEY_e, 0, 0) self.engine.do_process_key_event(IBus.KEY_y, 0, 0) self.assertEqual(self.engine.mock_preedit_text, 'ьХИыЕХэХШьД╕ьЭ┤') candidates = [unicodedata.normalize('NFC', x[0]) for x in self.engine._candidates] self.assertEqual(True, 'ьХИыЕХэХШьД╕ьЪФ' in candidates) self.assertEqual('ьХИыЕХэХШьД╕ьЪФ', candidates[0])
def load_dictionary(self): '''Load a hunspell dictionary and instantiate a enchant.Dict() or a hunspell.Hunspell() object. ''' if DEBUG_LEVEL > 0: sys.stderr.write("load_dictionary() ...\n") (self.dic_path, self.encoding, self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name) if self.words: # List of languages where accent insensitive matching makes sense: accent_languages = ( 'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb', 'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo', 'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr', 'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb', 'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds', 'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu', 'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq', 'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz', 've', 'vi', 'wa', 'xh', ) if self.name.split('_')[0] in accent_languages: self.word_pairs = [ (x, itb_util.remove_accents(x)) for x in self.words ] for word in self.words: if len(word) > self.max_word_len: self.max_word_len = len(word) if DEBUG_LEVEL > 1: sys.stderr.write( 'load_dictionary() max_word_len = %s\n' % self.max_word_len) if IMPORT_ENCHANT_SUCCESSFUL: self.enchant_dict = enchant.Dict(self.name) elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path: aff_path = self.dic_path.replace('.dic', '.aff') self.pyhunspell_object = hunspell.HunSpell( self.dic_path, aff_path)
def load_dictionary(self): '''Load a hunspell dictionary and instantiate a enchant.Dict() or a hunspell.Hunspell() object. ''' if DEBUG_LEVEL > 0: sys.stderr.write("load_dictionary() ...\n") (self.dic_path, self.encoding, self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name) if self.words: # List of languages where accent insensitive matching makes sense: accent_languages = ( 'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb', 'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fo', 'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr', 'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb', 'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds', 'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu', 'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq', 'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz', 've', 'vi', 'wa', 'xh', ) if self.name.split('_')[0] in accent_languages: self.word_pairs = [ (x, itb_util.remove_accents(x)) for x in self.words ] for x in self.words: if len(x) > self.max_word_len: self.max_word_len = len(x) if DEBUG_LEVEL > 1: sys.stderr.write( 'load_dictionary() max_word_len = %s\n' % self.max_word_len) if IMPORT_ENCHANT_SUCCESSFUL: self.enchant_dict = enchant.Dict(self.name) elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path: aff_path = self.dic_path.replace('.dic', '.aff') self.pyhunspell_object = hunspell.HunSpell(self.dic_path, aff_path)
def load_dictionary(self): '''Load a hunspell dictionary and instantiate a enchant.Dict() or a hunspell.Hunspell() object. ''' if DEBUG_LEVEL > 0: LOGGER.debug('load_dictionary() ...\n') (self.dic_path, self.encoding, self.words) = itb_util.get_hunspell_dictionary_wordlist(self.name) if self.words: # List of languages where accent insensitive matching makes sense: accent_languages = ( 'af', 'ast', 'az', 'be', 'bg', 'br', 'bs', 'ca', 'cs', 'csb', 'cv', 'cy', 'da', 'de', 'dsb', 'el', 'en', 'es', 'eu', 'fi', 'fo', 'fr', 'fur', 'fy', 'ga', 'gd', 'gl', 'grc', 'gv', 'haw', 'hr', 'hsb', 'ht', 'hu', 'ia', 'is', 'it', 'kk', 'ku', 'ky', 'lb', 'ln', 'lv', 'mg', 'mi', 'mk', 'mn', 'mos', 'mt', 'nb', 'nds', 'nl', 'nn', 'nr', 'nso', 'ny', 'oc', 'pl', 'plt', 'pt', 'qu', 'quh', 'ru', 'sc', 'se', 'sh', 'shs', 'sk', 'sl', 'smj', 'sq', 'sr', 'ss', 'st', 'sv', 'tet', 'tk', 'tn', 'ts', 'uk', 'uz', 've', 'vi', 'wa', 'xh', ) if self.name.split('_')[0] in accent_languages: self.word_pairs = [(x, itb_util.remove_accents(x)) for x in self.words] for word in self.words: if len(word) > self.max_word_len: self.max_word_len = len(word) if DEBUG_LEVEL > 1: LOGGER.debug('max_word_len = %s\n', self.max_word_len) if self.name.split('_')[0] == 'fi': self.enchant_dict = None self.pyhunspell_object = None if IMPORT_LIBVOIKKO_SUCCESSFUL: self.voikko = libvoikko.Voikko('fi') return if IMPORT_ENCHANT_SUCCESSFUL: try: self.enchant_dict = enchant.Dict(self.name) except enchant.errors.DictNotFoundError: LOGGER.exception('Error initializing enchant for %s', self.name) self.enchant_dict = None except Exception: LOGGER.exception( 'Unknown error initializing enchant for %s', self.name) self.enchant_dict = None elif IMPORT_HUNSPELL_SUCCESSFUL and self.dic_path: aff_path = self.dic_path.replace('.dic', '.aff') try: self.pyhunspell_object = hunspell.HunSpell( self.dic_path, aff_path) except hunspell.HunSpellError: LOGGER.debug('Error initializing hunspell for %s', self.name) self.pyhunspell_object = None except Exception: LOGGER.debug('Unknown error initializing hunspell for %s', self.name) self.pyhunspell_object = None
class HunspellSuggestTestCase(unittest.TestCase): def setUp(self): self.maxDiff = None def tearDown(self): pass def test_dummy(self): self.assertEqual(True, True) @unittest.skipUnless( IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-enchant to work.") def test_de_DE_cs_CZ_enchant(self): h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ']) self.assertEqual( h.suggest('Geschwindigkeitsubertre')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Geschwindigkeitsübertretungsverfahren')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0)) self.assertEqual(h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0)) self.assertEqual(h.suggest('filosofictejsi'), [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0), ('filosofic\u030Cte\u030Cji', -1)]) self.assertEqual( h.suggest('filosofictejs')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtější')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtějš')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) @unittest.skipUnless( IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-pyhunspell to work.") def test_de_DE_cs_CZ_pyhunspell(self): h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ']) self.assertEqual( h.suggest('Geschwindigkeitsubertre')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Geschwindigkeitsübertretungsverfahren')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0)) self.assertEqual(h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0)) self.assertEqual(h.suggest('filosofictejsi'), [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0), ('filosofie\u0300ti\u0300ji', -1)]) self.assertEqual( h.suggest('filosofictejs')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtější')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtějš')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) def test_it_IT(self): h = hunspell_suggest.Hunspell(['it_IT']) self.assertEqual(h.suggest('principianti'), [('principianti', 0), ('principiati', -1), ('principiante', -1), ('principiarti', -1), ('principiasti', -1)]) def test_es_ES(self): h = hunspell_suggest.Hunspell(['es_ES']) self.assertEqual(h.suggest('teneis'), [('tene\u0301is', 0), ('tenes', -1), ('tenis', -1), ('teneos', -1), ('tienes', -1), ('te neis', -1), ('te-neis', -1)]) self.assertEqual(h.suggest('tenéis')[0], ('tene\u0301is', 0)) def test_en_US(self): h = hunspell_suggest.Hunspell(['en_US']) self.assertEqual(h.suggest('camel'), [('camel', 0), ('camellia', 0), ('camelhair', 0), ('came', -1), ('Camel', -1), ('cameo', -1), ('came l', -1), ('camels', -1)]) def test_fr_FR(self): h = hunspell_suggest.Hunspell(['fr_FR']) self.assertEqual(h.suggest('differemmen'), [('diffe\u0301remment', 0)]) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('el_GR')[0], "Skipping because no Greek dictionary could be found. ") def test_el_GR(self): h = hunspell_suggest.Hunspell(['el_GR']) self.assertEqual(h.suggest('αλφαβητο')[0], ('αλφάβητο', 0)) def test_fi_FI_dictionary_file(self): # dictionary file is included in ibus-typing-booster h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual(h.suggest('kissa'), [('kissa', 0), ('kissaa', 0), ('kissani', 0), ('kissassa', 0), ('kissajuttu', 0), ('kissamaiseksi', 0)]) self.assertEqual(h.suggest('Pariisin-suurlahettila'), [('Pariisin-suurla\u0308hettila\u0308s', 0)]) @unittest.skipUnless( IMPORT_LIBVOIKKO_SUCCESSFUL, "Skipping because this test requires python3-libvoikko to work.") def test_fi_FI_voikko(self): d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.has_spellchecking(), True) h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual(h.suggest('kisssa'), [('kissa', -1), ('kissaa', -1), ('kisassa', -1), ('kisussa', -1)]) @unittest.skipUnless( IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-enchant to work.") def test_en_US_spellcheck_enchant(self): d = hunspell_suggest.Dictionary('en_US') self.assertEqual(d.spellcheck_enchant('winter'), True) self.assertEqual(d.spellcheck_enchant('winxer'), False) @unittest.skipUnless( IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-enchant to work.") def test_en_US_spellcheck_suggest_enchant(self): d = hunspell_suggest.Dictionary('en_US') self.assertEqual(d.spellcheck_suggest_enchant('kamel'), ['camel', 'Camel']) @unittest.skipUnless( IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-pyhunspell to work.") def test_en_US_spellcheck_pyhunspell(self): d = hunspell_suggest.Dictionary('en_US') self.assertEqual(d.spellcheck_pyhunspell('winter'), True) self.assertEqual(d.spellcheck_pyhunspell('winxer'), False) @unittest.skipUnless( IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-pyhunspell to work.") def test_en_US_spellcheck_suggest_pyhunspell(self): d = hunspell_suggest.Dictionary('en_US') self.assertEqual(d.spellcheck_suggest_pyhunspell('kamel'), ['camel', 'Camel']) @unittest.skipUnless( IMPORT_LIBVOIKKO_SUCCESSFUL, "Skipping because this test requires python3-libvoikko to work.") def test_fi_FI_spellcheck_voikko(self): d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.spellcheck_voikko('kissa'), True) self.assertEqual(d.spellcheck_voikko('kisssa'), False) self.assertEqual(d.spellcheck_voikko('Päiviä'), True) self.assertEqual(d.spellcheck_voikko('Päivia'), False) @unittest.skipUnless( IMPORT_LIBVOIKKO_SUCCESSFUL, "Skipping because this test requires python3-libvoikko to work.") def test_fi_FI_spellcheck_suggest_voikko(self): d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.spellcheck_suggest_voikko('kisssa'), ['kissa', 'kissaa', 'kisassa', 'kisussa'])
class TabSqliteDbTestCase(unittest.TestCase): def setUp(self) -> None: pass def tearDown(self) -> None: pass def init_database( self, user_db_file: str = ':memory:', dictionary_names: Iterable[str] = ('en_US',)) -> None: self.database = tabsqlitedb.TabSqliteDb(user_db_file=user_db_file) self.database.hunspell_obj.set_dictionary_names( list(dictionary_names)) def read_training_data_from_file(self, filename: str) -> bool: if '/' not in filename: path = os.path.join(os.path.dirname(__file__), filename) path = os.path.expanduser(path) if not os.path.isfile(path): path += '.gz' if not os.path.isfile(path): return False return self.database.read_training_data_from_file(path) def simulate_typing_file( self, path: str, verbose: bool = True) -> Dict[str, Union[int, float]]: stats: Dict[str, Union[int, float]] = { 'typed': 0, 'committed': 0, 'saved': 0, 'percent': 0.0} if '/' not in path: path = os.path.join(os.path.dirname(__file__), path) path = os.path.expanduser(path) if not os.path.isfile(path): path += '.gz' if not os.path.isfile(path): self.assertFalse(True) return stats open_function: Callable[[Any], Any] = open if path.endswith('.gz'): open_function = gzip.open with open_function( # type: ignore path, mode='rt', encoding='UTF-8') as file_handle: lines = file_handle.readlines() p_token = '' pp_token = '' total_length_typed = 0 total_length_committed = 0 total_length_saved = 0 total_percent_saved = 0.0 current_line = 0 total_lines = len(lines) for line in lines: current_line += 1 for token in itb_util.tokenize(line): length_typed = 0 length_saved = 0 percent_saved = 0.0 for i in range(1, len(token)): candidates = self.database.select_words( token[:i], p_phrase=p_token, pp_phrase=pp_token) if candidates and candidates[0][0] == token: length_typed = i break if i == len(token) - 1: length_typed = len(token) length_saved = length_typed - len(token) percent_saved = 100.0 * length_saved / len(token) total_length_typed += length_typed total_length_committed += len(token) total_length_saved += length_saved total_percent_saved = ( 100.0 * total_length_saved / total_length_committed) if verbose: LOGGER.info( 'line %s/%s: %s -> %s %s %2.1f%% ' 'total: %s -> %s %s %2.1f%%', current_line, total_lines, token[:length_typed], token, length_saved, percent_saved, total_length_typed, total_length_committed, total_length_saved, total_percent_saved) stats['typed'] = total_length_typed stats['committed'] = total_length_committed stats['saved'] = total_length_saved stats['percent'] = total_percent_saved return stats def test_dummy(self) -> None: self.assertEqual(True, True) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no en_US hunspell dictionary could be found.') def test_empty_database_only_dictionary(self) -> None: self.init_database( user_db_file=':memory:', dictionary_names=['en_US']) self.assertEqual( 'Baltimore', self.database.select_words( 'baltim', p_phrase='foo', pp_phrase='bar')[0][0]) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no en_US hunspell dictionary could be found.') @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0], 'Skipping because no fr_FR hunspell dictionary could be found.') @unittest.skipUnless( IMPORT_DISTRO_SUCCESSFUL and distro.id() == 'fedora', 'Skipping on other distros then Fedora, ' 'French dictionary might be too different on other distributions.') def test_english_poem(self) -> None: training_file = 'the_road_not_taken.txt' self.init_database( user_db_file=':memory:',dictionary_names=['fr_FR']) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -2.5% saved when typing the English poem with the fr_FR dictionary: self.assertEqual(-2.5, round(stats['percent'], 1)) # Set the en_US dictionary and see whether the result is better: self.database.hunspell_obj.set_dictionary_names(['en_US']) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -9.3% saved when typing the English poem with the en_US dictionary: self.assertEqual(-9.3, round(stats['percent'], 1)) self.assertEqual( 'undergrad', self.database.select_words( 'undergr', p_phrase='in', pp_phrase='the')[0][0]) self.assertEqual(0, self.database.number_of_rows_in_database()) self.assertEqual( True, self.read_training_data_from_file(training_file)) # Now the database should have rows: self.assertEqual(148, self.database.number_of_rows_in_database()) # Now that the training data has been read into the database # the result should change: self.assertEqual( 'undergrowth', self.database.select_words( 'undergr', p_phrase='in', pp_phrase='the')[0][0]) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -51.3% saved when typing the English poem with the trained database # and the en_US dictionary: self.assertEqual(-51.3, round(stats['percent'], 1)) # Set the fr_FR dictionary and see whether that makes the result worse: self.database.hunspell_obj.set_dictionary_names(['fr_FR']) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -51.3% saved when typing the English poem with the trained database # and the fr_FR dictionary. When the database is trained so well, # the dictionary almost doesn’t matter anymore: self.assertEqual(-51.3, round(stats['percent'], 1)) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no en_US hunspell dictionary could be found.') @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0], 'Skipping because no fr_FR hunspell dictionary could be found.') @unittest.skipUnless( IMPORT_DISTRO_SUCCESSFUL and distro.id() == 'fedora', 'Skipping on other distros then Fedora, ' 'French dictionary might be too different on other distributions.') def test_french_poem(self) -> None: training_file = 'chant_d_automne.txt' self.init_database( user_db_file=':memory:',dictionary_names=['en_US']) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -2.3% saved when typing the French poem with the en_US dictionary: self.assertEqual(-2.3, round(stats['percent'], 1)) # Set the fr_FR dictionary and see whether the result is better: self.database.hunspell_obj.set_dictionary_names(['fr_FR']) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -7.3% saved on Fedora 35 when typing the French poem with # the fr_FR dictionary. On openSUSE Tumbleweed (2021-11-23) # it is -8.2%. self.assertEqual(-7.3, round(stats['percent'], 1)) self.assertEqual( 'plonge', self.database.select_words( 'plong', p_phrase='nous', pp_phrase='Bientôt')[0][0]) self.assertEqual(0, self.database.number_of_rows_in_database()) self.assertEqual( True, self.read_training_data_from_file(training_file)) # Now the database should have rows: self.assertEqual(224, self.database.number_of_rows_in_database()) # Now that the training data has been read into the database # the result should change: self.assertEqual( 'plongerons', self.database.select_words( 'plong', p_phrase='nous', pp_phrase='Bientôt')[0][0]) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -37.6% saved when typing the French poem with the trained database # and the fr_FR dictionary: self.assertEqual(-37.6, round(stats['percent'], 1)) # Set the fr_FR dictionary and see whether that makes the result worse: self.database.hunspell_obj.set_dictionary_names(['en_US']) stats = self.simulate_typing_file(training_file, verbose=False) LOGGER.info('stats=%s', repr(stats)) # -37.6% saved when typing the French poem with the trained database # and the en_US dictionary. When the database is trained so well, # the dictionary almost doesn’t matter anymore: self.assertEqual(-37.6, round(stats['percent'], 1)) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0], 'Skipping because no fr_FR hunspell dictionary could be found.') @unittest.skipUnless( IMPORT_DISTRO_SUCCESSFUL and distro.id() == 'fedora', 'Skipping on other distros then Fedora, ' 'French dictionary might be too different on other distributions.') def test_french_book(self) -> None: training_file = 'victor_hugo_notre_dame_de_paris.txt' self.init_database( user_db_file=':memory:',dictionary_names=['fr_FR']) self.assertEqual(0, self.database.number_of_rows_in_database()) if not self.read_training_data_from_file(training_file): self.skipTest('Training file %s not available' % training_file) # Now the database should have rows: self.assertEqual(156245, self.database.number_of_rows_in_database()) self.database.cleanup_database(thread=False) self.assertEqual(50000, self.database.number_of_rows_in_database()) stats = self.simulate_typing_file(training_file, verbose=True) LOGGER.info('stats=%s', repr(stats)) # -27% saved when typing the French poem with the trained database # and the fr_FR dictionary: self.assertEqual(-24, round(stats['percent'], 0))
class HunspellSuggestTestCase(unittest.TestCase): def setUp(self) -> None: self.maxDiff = None def tearDown(self) -> None: pass def test_dummy(self) -> None: self.assertEqual(True, True) @unittest.skipUnless( IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-enchant to work.") @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('cs_CZ')[0], 'Skipping because no Czech hunspell dictionary could be found.') @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('de_DE')[0], 'Skipping because no German hunspell dictionary could be found.') @unittest.skipUnless( testutils.enchant_sanity_test(language='cs_CZ', word='Praha'), 'Skipping because python3-enchant seems broken for cs_CZ.') def test_de_DE_cs_CZ_enchant(self) -> None: h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ']) self.assertEqual( h.suggest('Geschwindigkeitsubertre')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Geschwindigkeitsübertretungsverfahren')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0)) self.assertEqual( h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0)) self.assertEqual( h.suggest('filosofictejsi'), [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0), ('filosofic\u030Cte\u030Cji', -1)]) self.assertEqual( h.suggest('filosofictejs')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtější')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtějš')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) @unittest.skipUnless( IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-pyhunspell to work.") @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('cs_CZ')[0], 'Skipping because no Czech hunspell dictionary could be found.') @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('de_DE')[0], 'Skipping because no German hunspell dictionary could be found.') def test_de_DE_cs_CZ_pyhunspell(self) -> None: h = hunspell_suggest.Hunspell(['de_DE', 'cs_CZ']) self.assertEqual( h.suggest('Geschwindigkeitsubertre')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Geschwindigkeitsübertretungsverfahren')[0], ('Geschwindigkeitsu\u0308bertretungsverfahren', 0)) self.assertEqual( h.suggest('Glühwürmchen')[0], ('Glu\u0308hwu\u0308rmchen', 0)) self.assertEqual( h.suggest('Alpengluhen')[0], ('Alpenglu\u0308hen', 0)) self.assertEqual( h.suggest('filosofictejsi'), [('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0), ('filosofie\u0300ti\u0300ji', -1)]) self.assertEqual( h.suggest('filosofictejs')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtější')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) self.assertEqual( h.suggest('filosofičtějš')[0], ('filosofic\u030Cte\u030Cjs\u030Ci\u0301', 0)) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('it_IT')[0], 'Skipping because no Italian hunspell dictionary could be found.') def test_it_IT(self) -> None: h = hunspell_suggest.Hunspell(['it_IT']) self.assertEqual( h.suggest('principianti'), [('principianti', 0), ('principiati', -1), ('principiante', -1), ('principiarti', -1), ('principiasti', -1)]) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('es_ES')[0], 'Skipping because no Spanish hunspell dictionary could be found.') def test_es_ES(self) -> None: h = hunspell_suggest.Hunspell(['es_ES']) self.assertEqual( h.suggest('teneis'), [('tene\u0301is', 0), ('tenes', -1), ('tenis', -1), ('teneos', -1), ('tienes', -1), ('te neis', -1), ('te-neis', -1)]) self.assertEqual( h.suggest('tenéis')[0], ('tene\u0301is', 0)) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no US English hunspell dictionary could be found.') @unittest.skipUnless( testutils.enchant_sanity_test(language='cs_CZ', word='Praha'), 'Skipping because python3-enchant seems broken for cs_CZ.') @unittest.skipUnless( testutils.enchant_working_as_expected(), 'Skipping because of an unexpected change in the enchant behaviour.') def test_en_US(self) -> None: h = hunspell_suggest.Hunspell(['en_US']) self.assertEqual( [('Camel', 0), ('camel', 0), ('Camelot', 0), ('camellia', 0), ('camelhair', 0), ('Camelopardalis', 0), ('CAM', -1), ('Cal', -1), ('Mel', -1), ('cal', -1), ('cam', -1), ('Carl', -1), ('Gael', -1), ('Jame', -1), ('call', -1), ('came', -1), ('come', -1), ('game', -1), ('Jamal', -1), ('Jamel', -1), ('Ocaml', -1), ('cable', -1), ('cameo', -1), ('calmer', -1), ('camels', -1), ('comely', -1), ('compel', -1), ('gamely', -1), ("Camel's", -1), ('Camilla', -1), ('Camille', -1), ('Carmela', -1), ('Carmelo', -1), ("Jamel's", -1), ("camel's", -1), ('caramel', -1), ('Carmella', -1)], h.suggest('camel')) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('fr_FR')[0], 'Skipping because no French hunspell dictionary could be found.') def test_fr_FR(self) -> None: h = hunspell_suggest.Hunspell(['fr_FR']) self.assertEqual( h.suggest('differemmen'), [('diffe\u0301remment', 0)]) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('el_GR')[0], "Skipping because no Greek dictionary could be found. ") def test_el_GR(self) -> None: h = hunspell_suggest.Hunspell(['el_GR']) self.assertEqual( h.suggest('αλφαβητο')[0], ('αλφάβητο', 0)) def test_fi_FI_dictionary_file(self) -> None: # dictionary file is included in ibus-typing-booster # # This should work with and without voikko h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual( h.suggest('kissa'), [('kissa', 0), ('kissaa', 0), ('kissani', 0), ('kissassa', 0), ('kissajuttu', 0), ('kissamaiseksi',0)]) @unittest.skipUnless( testutils.get_libvoikko_version() >= '4.3', "Skipping, requires python3-libvoikko version >= 4.3.") @unittest.skipIf( testutils.init_libvoikko_error(), "Skipping, %s" % testutils.init_libvoikko_error()) def test_fi_FI_voikko(self) -> None: d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.has_spellchecking(), True) h = hunspell_suggest.Hunspell(['fi_FI']) self.assertEqual( h.suggest('kisssa'), [('kissa', -1), ('Kiassa', -1), ('kissaa', -1), ('kisassa', -1), ('kisussa', -1)]) self.assertEqual( h.suggest('Pariisin-suurlähettila'), [('Pariisin-suurla\u0308hettila\u0308s', 0), ('Pariisin-suurlähetetila', -1), ('Pariisin-suurlähettiala', -1)]) @unittest.skipUnless( IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-enchant to work.") @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no US English hunspell dictionary could be found.') def test_en_US_spellcheck_enchant(self) -> None: d = hunspell_suggest.Dictionary('en_US') self.assertEqual(d.spellcheck_enchant('winter'), True) self.assertEqual(d.spellcheck_enchant('winxer'), False) @unittest.skipUnless( IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-enchant to work.") @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no US English hunspell dictionary could be found.') @unittest.skipUnless( testutils.enchant_sanity_test(language='cs_CZ', word='Praha'), 'Skipping because python3-enchant seems broken for cs_CZ.') @unittest.skipUnless( testutils.enchant_working_as_expected(), 'Skipping because of an unexpected change in the enchant behaviour.') def test_en_US_spellcheck_suggest_enchant(self) -> None: d = hunspell_suggest.Dictionary('en_US') self.assertEqual( d.spellcheck_suggest_enchant('kamel'), ['Jamel', 'Camel', 'camel', 'Jamal', 'gamely']) @unittest.skipUnless( IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-pyhunspell to work.") @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no US English hunspell dictionary could be found.') def test_en_US_spellcheck_pyhunspell(self) -> None: d = hunspell_suggest.Dictionary('en_US') self.assertEqual(d.spellcheck_pyhunspell('winter'), True) self.assertEqual(d.spellcheck_pyhunspell('winxer'), False) @unittest.skipUnless( IMPORT_HUNSPELL_SUCCESSFUL and not IMPORT_ENCHANT_SUCCESSFUL, "Skipping because this test requires python3-pyhunspell to work.") @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('en_US')[0], 'Skipping because no US English hunspell dictionary could be found.') def test_en_US_spellcheck_suggest_pyhunspell(self) -> None: d = hunspell_suggest.Dictionary('en_US') self.assertEqual( d.spellcheck_suggest_pyhunspell('kamel'), ['camel', 'Camel']) @unittest.skipUnless( testutils.get_libvoikko_version() >= '4.3', "Skipping, requires python3-libvoikko version >= 4.3.") @unittest.skipIf( testutils.init_libvoikko_error(), "Skipping, %s" % testutils.init_libvoikko_error()) def test_fi_FI_spellcheck_voikko(self) -> None: d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual(d.spellcheck_voikko('kissa'), True) self.assertEqual(d.spellcheck_voikko('kisssa'), False) self.assertEqual(d.spellcheck_voikko('Päiviä'), True) self.assertEqual(d.spellcheck_voikko('Päivia'), False) @unittest.skipUnless( testutils.get_libvoikko_version() >= '4.3', "Skipping, requires python3-libvoikko version >= 4.3.") @unittest.skipIf( testutils.init_libvoikko_error(), "Skipping, %s" % testutils.init_libvoikko_error()) def test_fi_FI_spellcheck_suggest_voikko(self) -> None: d = hunspell_suggest.Dictionary('fi_FI') self.assertEqual( d.spellcheck_suggest_voikko('kisssa'), ['kissa', 'kissaa', 'kisassa', 'kisussa', 'Kiassa']) @unittest.skipUnless( itb_util.get_hunspell_dictionary_wordlist('sv_SE')[0], "Skipping because no Swedisch dictionary could be found. ") def test_sv_SE(self) -> None: h = hunspell_suggest.Hunspell(['sv_SE']) self.assertEqual( h.suggest('östgo'), [('östgot', 0), ('Östgöta', 0), ('östgöte', 0), ('östgotisk', 0), ('östgötsk', 0), ('östgötska', 0)]) self.assertEqual( h.suggest('östgot'), [('östgot', 0), ('östgotisk', 0), ('Östgot', -1)]) self.assertEqual( h.suggest('östgö'), [('Östgöta', 0), ('östgöte', 0), ('östgötsk', 0), ('östgötska', 0)]) self.assertEqual( h.suggest('östgöt')[0:5], [('Östgöta', 0), ('östgöte', 0), ('östgötsk', 0), ('östgötska', 0), ('östgot', -1)])