# XXX: Bad workaround for lack of data_files in poetry import os from urllib.request import urlretrieve from lextract.data import get_data_path os.chdir(get_data_path()) for fn in ["t2s_char.json", "wn-data-cmn.diff"]: if os.path.exists(fn): continue fn_dl, headers = urlretrieve( "https://raw.githubusercontent.com/frankier/STIFF/62773e76ded69c35ba7cc15e7687091b40448951/stiff/data/" + fn, fn, )
def fix_cmn(): wordnet.custom_lemmas(open(get_data_path("wn-data-cmn-fixed.tab")), lang="cmn")
def add_omw_wikt(): for lang, code in [("fin", "qwf"), ("cmn", "qwc")]: wordnet.custom_lemmas(open(get_data_path( "wn-wikt-{}.tab".format(lang))), lang=code)
def get_opencc(): global _opencc if _opencc is None: opencc_config = get_data_path("t2s_char.json") _opencc = opencc.OpenCC(opencc_config) return _opencc