def test_pickle_compressed(self): pickle_path = os.path.join(self.fortests_path, "dictionary.pickle") edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) sym_spell.load_dictionary(self.dictionary_path, 0, 1) sym_spell.save_pickle(pickle_path) sym_spell_2 = SymSpell(edit_distance_max, prefix_length) sym_spell_2.load_pickle(pickle_path) self.assertEqual(sym_spell.deletes, sym_spell_2.deletes) self.assertEqual(sym_spell.words, sym_spell_2.words) self.assertEqual(sym_spell._max_length, sym_spell_2._max_length) os.remove(pickle_path)
def init_sym_spell(): from pathlib import Path from symspellpy import SymSpell from ds_tools.fs.paths import get_user_cache_dir sym_spell = SymSpell(max_dictionary_edit_distance=0, prefix_length=1) dict_path_pkl = Path( get_user_cache_dir('music_manager')).joinpath('words.pkl.gz') if dict_path_pkl.exists(): log.debug(f'Loading pickled spellcheck dictionary: {dict_path_pkl}') sym_spell.load_pickle(dict_path_pkl) else: import lzma import pkg_resources dict_path = pkg_resources.resource_filename( 'symspellpy', 'frequency_dictionary_en_82_765.txt') sym_spell.load_dictionary(dict_path, 0, 1) word_list_path_xz = Path( pkg_resources.resource_filename( 'music', '../../etc/scowl/words.xz')).resolve() log.debug( f'Loading default dictionary + word list from {word_list_path_xz}') with lzma.open(word_list_path_xz, 'rt', encoding='utf-8') as f: word_list = f.read().splitlines() loaded = sym_spell._words min_count = min(loaded.values()) add_word = sym_spell.create_dictionary_entry for word in word_list: try: loaded[word] except KeyError: add_word(word, min_count) fmt = 'Saving pickled spellcheck dictionary (this is a one-time action that may take about 15 seconds): {}' log.info(fmt.format(dict_path_pkl)) sym_spell.save_pickle(dict_path_pkl) return sym_spell