예제 #1
0
# XXX: Bad workaround for lack of data_files in poetry

import os
from urllib.request import urlretrieve

from lextract.data import get_data_path

os.chdir(get_data_path())
for fn in ["t2s_char.json", "wn-data-cmn.diff"]:
    if os.path.exists(fn):
        continue
    fn_dl, headers = urlretrieve(
        "https://raw.githubusercontent.com/frankier/STIFF/62773e76ded69c35ba7cc15e7687091b40448951/stiff/data/"
        + fn,
        fn,
    )
예제 #2
0
파일: fixes.py 프로젝트: frankier/lextract
def fix_cmn():
    wordnet.custom_lemmas(open(get_data_path("wn-data-cmn-fixed.tab")),
                          lang="cmn")
예제 #3
0
파일: fixes.py 프로젝트: frankier/lextract
def add_omw_wikt():
    for lang, code in [("fin", "qwf"), ("cmn", "qwc")]:
        wordnet.custom_lemmas(open(get_data_path(
            "wn-wikt-{}.tab".format(lang))),
                              lang=code)
예제 #4
0
파일: opencc.py 프로젝트: frankier/lextract
def get_opencc():
    global _opencc
    if _opencc is None:
        opencc_config = get_data_path("t2s_char.json")
        _opencc = opencc.OpenCC(opencc_config)
    return _opencc