class Mock_thai_name(): female_names = list(pc.thai_female_names()) male_names = list(pc.thai_male_names()) family_names = list(pc.thai_family_names()) def __init__(self, female_n: int = 1, male_n: int = 1, output_csv_filename=None): """สุ่มชื่อภาษาไทยโดยระบุจำนวนชื่อผู้หญิงและผู้ชาย Args: female_n (int, optional): จำนวนชื่อผู้หญิง. Defaults to 1. male_n (int, optional): จำนวนชื่อผู้ชาย. Defaults to 1. output_csv_filename ([type], optional): save ผลลัพธ์เป็น csv file. Defaults to None. """ females = random.sample(self.female_names, female_n) females_family = random.sample(self.family_names, female_n) males = random.sample(self.male_names, male_n) males_family = random.sample(self.family_names, male_n) dict_female = { 'fname': females, 'lname': females_family, 'gender': 'f' } dict_male = {'fname': males, 'lname': males_family, 'gender': 'm'} self.df = pd.concat( [pd.DataFrame(dict_female), pd.DataFrame(dict_male)]) if output_csv_filename: self.df.to_csv(output_csv_filename, index=False)
def test_corpus(self): self.assertIsInstance(thai_negations(), frozenset) self.assertIsInstance(thai_stopwords(), frozenset) self.assertIsInstance(thai_syllables(), frozenset) self.assertIsInstance(thai_words(), frozenset) self.assertIsInstance(countries(), frozenset) self.assertIsInstance(provinces(), frozenset) self.assertIsInstance(thai_female_names(), frozenset) self.assertIsInstance(thai_male_names(), frozenset) self.assertEqual(get_corpus_db_detail("XXX"), {}) # corpus does not exist self.assertTrue(download("test")) # download the first time self.assertTrue(download(name="test", force=True)) # force download self.assertTrue(download(name="test")) # try download existing self.assertFalse(download(name="test", url="wrongurl")) # URL not exist self.assertFalse( download(name="XxxXXxxx817d37sf")) # corpus name not exist self.assertIsNotNone(get_corpus_db_detail("test")) # corpus exists self.assertTrue(remove("test")) # remove existing self.assertFalse(remove("test")) # remove non-existing self.assertTrue(download(name="test", version="0.1")) self.assertTrue(remove("test"))
def test_corpus(self): self.assertIsInstance(thai_negations(), frozenset) self.assertIsInstance(thai_stopwords(), frozenset) self.assertIsInstance(thai_syllables(), frozenset) self.assertIsInstance(thai_words(), frozenset) self.assertIsInstance(countries(), frozenset) self.assertIsInstance(provinces(), frozenset) self.assertIsInstance(provinces(details=True), list) self.assertEqual(len(provinces(details=False)), len(provinces(details=True))) self.assertIsInstance(thai_family_names(), frozenset) self.assertIsInstance(list(thai_family_names())[0], str) self.assertIsInstance(thai_female_names(), frozenset) self.assertIsInstance(thai_male_names(), frozenset) self.assertIsInstance( get_corpus_db("https://example.com/XXXXXX0lkjasd/SXfmskdjKKXXX"), Response, ) # URL does not exist, should get 404 response self.assertIsNone(get_corpus_db("XXXlkja3sfdXX")) # Invalid URL self.assertEqual(get_corpus_db_detail("XXXmx3KSXX"), {}) # corpus does not exist self.assertEqual(get_corpus_db_detail("XXXmx3KSXX", version="0.2"), {}) # corpus does not exist self.assertTrue(download("test")) # download the first time self.assertTrue(download(name="test", force=True)) # force download self.assertTrue(download(name="test")) # try download existing self.assertFalse(download(name="test", url="wrongurl")) # URL not exist self.assertFalse( download(name="XxxXXxxx817d37sf")) # corpus name not exist self.assertIsNotNone(get_corpus_db_detail("test")) # corpus exists self.assertIsNotNone(get_corpus_path("test")) # corpus exists self.assertTrue(remove("test")) # remove existing self.assertFalse(remove("test")) # remove non-existing self.assertIsNone(get_corpus_path("XXXkdjfBzc")) # query non-existing self.assertFalse(download(name="test", version="0.0")) self.assertFalse(download(name="test", version="0.0.0")) self.assertFalse(download(name="test", version="0.0.1")) self.assertFalse(download(name="test", version="0.0.2")) self.assertFalse(download(name="test", version="0.0.3")) self.assertFalse(download(name="test", version="0.0.4")) self.assertIsNotNone(download(name="test", version="0.0.5")) self.assertTrue(download("test")) self.assertIsNotNone(remove("test")) # remove existing self.assertIsNotNone(download(name="test", version="0.0.6")) self.assertIsNotNone(download(name="test", version="0.0.7")) self.assertIsNotNone(download(name="test", version="0.0.8")) self.assertIsNotNone(download(name="test", version="0.0.9")) self.assertIsNotNone(download(name="test", version="0.0.10")) with self.assertRaises(Exception) as context: self.assertIsNotNone(download(name="test", version="0.0.11")) self.assertTrue( "Hash does not match expected." in str(context.exception)) self.assertIsNotNone(download(name="test", version="0.1")) self.assertIsNotNone(remove("test"))
def test_corpus(self): self.assertIsNotNone(countries()) self.assertIsNotNone(provinces()) self.assertIsNotNone(thai_negations()) self.assertIsNotNone(thai_stopwords()) self.assertIsNotNone(thai_syllables()) self.assertIsNotNone(thai_words()) self.assertIsNotNone(thai_female_names()) self.assertIsNotNone(thai_male_names()) self.assertEqual(get_corpus_db_detail("XXX"), {}) self.assertIsNone(download("test")) self.assertIsNone(download("test", force=True)) self.assertIsNotNone(get_corpus_db_detail("test")) self.assertIsNotNone(remove("test")) self.assertFalse(remove("test"))
# -*- coding: utf-8 -*- import random from pythainlp.corpus import thai_female_names, thai_male_names, thai_words from faker import Faker fake = Faker() list_name = list(thai_female_names()) + list(thai_male_names()) list_domain_thai = [ ".go.th", ".co.th", ".or.th", ".in.th", ".ac.th", ".net.th", ".mi.th", ".ไทย" ] list_thai_word = [i for i in list(thai_words()) if ' ' not in i] def gen_name(full_name: bool = False) -> str: name = random.choice(list_name) if full_name: name += " " + random.choice(list_name) return name def gen_thai_phone_number(mobile: bool = True) -> str: num = "0" last_i = 7 if mobile: last_i = 8 num += str(random.randint(1, 9)) for i in range(0, last_i): num += str(random.randint(0, 9)) return num