def load_mrd(dir): print('parsing source dictionary file...') mrd_source = MrdDataSource(os.path.join(dir,'morphs.utf8.mrd'), os.path.join(dir,'gramtab.utf8.mrd'), strip_EE=True) mrd_source.load() print('calculating rule frequencies...') mrd_source.calculate_rule_freq() return mrd_source
coding, 'utf8') convert_file(os.path.join(src_dir, 'Morph', gramtab), os.path.join(dest_dir, lang, 'gramtab.utf8.mrd'), coding, 'utf8') def cleanup_after_convert(dir): print('cleaning up...') os.unlink(os.path.join(dir, 'morphs.utf8.mrd')) os.unlink(os.path.join(dir, 'gramtab.utf8.mrd')) print("========") if __name__ == '__main__': MrdDataSource.setup_psyco() src_dir = 'dicts/src/Dicts' dest_dir = 'dicts/converted' LANGUAGES = 'en', 'ru' FORMATS = 'cdb', 'sqlite', # 'tinycdb', 'cdblib', # 'shelve', for lang in LANGUAGES: convert_dicts(src_dir, dest_dir, lang) dest = os.path.join(dest_dir, lang) mrd = load_mrd(dest) make_pickled(dest, mrd) for fmt in FORMATS: