예제 #1
0
 def test_path(self):
     data_filename = "ttc_freq.txt"
     self.assertTrue(
         get_full_data_path(data_filename).endswith(data_filename)
     )
     self.assertIsInstance(get_pythainlp_data_path(), str)
     self.assertIsInstance(get_pythainlp_path(), str)
예제 #2
0
    "thai_negations",
    "thai_stopwords",
    "thai_syllables",
    "thai_words",
    "path_pythainlp_corpus",
]

import os

from pythainlp.tools import get_full_data_path, get_pythainlp_path
from tinydb import TinyDB

# Remote and local corpus databases

_CORPUS_DIRNAME = "corpus"
_CORPUS_PATH = os.path.join(get_pythainlp_path(), _CORPUS_DIRNAME)

# remote corpus catalog URL
_CORPUS_DB_URL = ("https://pythainlp.github.io/pythainlp-corpus/db.json")

# local corpus catalog filename
_CORPUS_DB_FILENAME = "db.json"

# local corpus catalog full path
_CORPUS_DB_PATH = get_full_data_path(_CORPUS_DB_FILENAME)

# create a local corpus database if it does not already exist
if not os.path.exists(_CORPUS_DB_PATH):
    TinyDB(_CORPUS_DB_PATH).close()

예제 #3
0
# -*- coding: utf-8 -*-

import os
from urllib.request import urlopen

import requests
from pythainlp.tools import get_full_data_path, get_pythainlp_path
from tinydb import Query, TinyDB
from tqdm import tqdm

# Remote and local corpus databases

_CORPUS_DIRNAME = "corpus"
_CORPUS_PATH = os.path.join(get_pythainlp_path(), _CORPUS_DIRNAME)

_CORPUS_DB_URL = (
    "https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/2.0/db.json"
)

_CORPUS_DB_FILENAME = "db.json"
_CORPUS_DB_PATH = get_full_data_path(_CORPUS_DB_FILENAME)

if not os.path.exists(_CORPUS_DB_PATH):
    TinyDB(_CORPUS_DB_PATH)


def corpus_path():
    return _CORPUS_PATH


def corpus_db_url():