def test_path(self): data_filename = "ttc_freq.txt" self.assertTrue( get_full_data_path(data_filename).endswith(data_filename) ) self.assertIsInstance(get_pythainlp_data_path(), str) self.assertIsInstance(get_pythainlp_path(), str)
"thai_negations", "thai_stopwords", "thai_syllables", "thai_words", "path_pythainlp_corpus", ] import os from pythainlp.tools import get_full_data_path, get_pythainlp_path from tinydb import TinyDB # Remote and local corpus databases _CORPUS_DIRNAME = "corpus" _CORPUS_PATH = os.path.join(get_pythainlp_path(), _CORPUS_DIRNAME) # remote corpus catalog URL _CORPUS_DB_URL = ("https://pythainlp.github.io/pythainlp-corpus/db.json") # local corpus catalog filename _CORPUS_DB_FILENAME = "db.json" # local corpus catalog full path _CORPUS_DB_PATH = get_full_data_path(_CORPUS_DB_FILENAME) # create a local corpus database if it does not already exist if not os.path.exists(_CORPUS_DB_PATH): TinyDB(_CORPUS_DB_PATH).close()
# -*- coding: utf-8 -*- import os from urllib.request import urlopen import requests from pythainlp.tools import get_full_data_path, get_pythainlp_path from tinydb import Query, TinyDB from tqdm import tqdm # Remote and local corpus databases _CORPUS_DIRNAME = "corpus" _CORPUS_PATH = os.path.join(get_pythainlp_path(), _CORPUS_DIRNAME) _CORPUS_DB_URL = ( "https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/2.0/db.json" ) _CORPUS_DB_FILENAME = "db.json" _CORPUS_DB_PATH = get_full_data_path(_CORPUS_DB_FILENAME) if not os.path.exists(_CORPUS_DB_PATH): TinyDB(_CORPUS_DB_PATH) def corpus_path(): return _CORPUS_PATH def corpus_db_url():