def spell_checker(request): # if this is a POST request we need to process the form data text = "" if request.method == 'POST': # create a form instance and populate it with data from the request: form = TextForm(request.POST) # check whether it's valid: if form.is_valid(): # process the data in form.cleaned_data as required InputText = form.cleaned_data['InputText'] dict_file = open("/root/geo_spellchecker/django_server/spell_checker/dataset/dict.pkl", "rb") geo_dictlist = pickle.load(dict_file) geo_spell_checker = NorvigSpellChecker(custom_dict=geo_dictlist,min_freq=5) allword_prob = geo_spell_checker.spell(InputText) #prob = geo_spell_checker.prob(test_text) predict_word = allword_prob[0] print("Input Text: "+InputText) print("Result Text: "+predict_word) text = predict_word # redirect to a new URL: return render(request, 'spell_checker_form.html', {'form': form, 'text': text}, ) # if a GET (or any other method) we'll create a blank form else: form = TextForm() return render(request, 'spell_checker_form.html', {'form': form, 'text': text}, )
def test_spell(self): self.assertEqual(spell(None), "") self.assertEqual(spell(""), "") self.assertIsNotNone(spell("เน้ร")) self.assertIsNotNone(spell("เกสมร์")) self.assertEqual(correct(None), "") self.assertEqual(correct(""), "") self.assertIsNotNone(correct("ทดสอง")) checker = NorvigSpellChecker(dict_filter="") self.assertIsNotNone(checker.dictionary()) self.assertGreaterEqual(checker.prob("มี"), 0)
def test_norvig_spell_checker(self): checker = NorvigSpellChecker(dict_filter=None) self.assertTrue(len(checker.dictionary()) > 0) self.assertGreaterEqual(checker.prob("มี"), 0) custom_dict = [ ("การงาน", 31), # longer than max_len ("กาม", 1), # fewer than min_freq ("กาล0", 64), # has digit ("๒๔๗๕", 64), # has digit ("hello", 8), # not Thai ("ลบ", -1), # negative count ("การ", 42), # OK ] checker = NorvigSpellChecker(custom_dict=custom_dict, min_freq=2, max_len=5) self.assertEqual(len(checker.dictionary()), 1)
def Tokenize_word(self,text): ######## Thai word segment ######## ver1 '''sent = text[0].replace("'","") word = word_tokenize(sent, engine='deepcut') # use this method wword = [x.replace('.',' ').replace('=',' ').replace('-',' ').replace("("," ").replace(")"," ").replace("/"," ").replace('สำหรับ',' ').replace('%',' ').strip(' ') for x in word] words =[] for w in wword: if w not in common.thai_stopwords(): words = [str for str in words if str] words.append(w) return words''' ######## Thai word segment ######## ver2 -> stopwords, type of words, check spell(Eng & Thai) sent = text[0].replace("'","") word = word_tokenize(sent, engine='deepcut') # use this method #wword = [x.replace('=',' ').replace('-',' ').replace("("," ").replace(")"," ").replace("/"," ").strip(' ') for x in word] th_no_stopwords =[] all_no_stopwords =[] th_correct_words =[] eng_correct_words =[] mix_correct_words =[] mix1_correct_words =[] all_correct_words =[] all_correct_words_final =[] check_thai_list = [] #for tw in wword: for tw in word: if tw not in common.thai_stopwords(): th_no_stopwords = [str for str in th_no_stopwords if str] th_no_stopwords.append(tw) #print("th_no_stopwords = ", th_no_stopwords) for ew in th_no_stopwords: if ew not in stopwords.words('english'): all_no_stopwords = [str for str in all_no_stopwords if str] all_no_stopwords.append(ew) #print("all_no_stopwords = ", all_no_stopwords) for c in all_no_stopwords: thai = isthai(c) number = c.isnumeric() if not thai: no_num = c.isalpha() match1 = re.findall('\D', c) #Return ถ้าไม่พบตัวเลข 0-9 ใน string if no_num: spell = SpellChecker() eng_correct = spell.correction(c) #pn eng_correct_words.append(eng_correct) #print("eng = ", eng_correct) elif match1: mix = c mix_correct_words.append(mix) #print("mix = ", mix) else: num = c #No return #print("num = ", num) elif thai: checker = NorvigSpellChecker(custom_dict=tnc.word_freqs()) #pn th_correct = checker.correct(c) th_correct_words.append(th_correct) #print("thai = ", th_correct) all_correct_words = th_correct_words + eng_correct_words + mix_correct_words all_correct_words = [x.replace('น.','').replace(':',' ').replace('=',' ').replace('–',' ').replace("("," ").replace(")"," ").replace("/"," ").strip(" ") for x in all_correct_words] all_correct_words_final = list(filter(None, all_correct_words)) #print("words = ", all_correct_words_final) return all_correct_words_final ######## Eng word segment ######## '''word = text[0]
def test_norvig_spell_checker(self): checker = NorvigSpellChecker(dict_filter=None) self.assertTrue(len(checker.dictionary()) > 0) self.assertGreaterEqual(checker.prob("มี"), 0) user_dict = [ ("การงาน", 31), # longer than max_len ("กาม", 1), # fewer than min_freq ("กาล0", 64), # has digit ("๒๔๗๕", 64), # has digit ("hello", 8), # not Thai ("ลบ", -1), # negative count ("การ", 42), # OK ] checker = NorvigSpellChecker( custom_dict=user_dict, min_freq=2, max_len=5 ) self.assertEqual(len(checker.dictionary()), 1) user_dict = [ "เอกราช", "ปลอดภัย", "เศรษฐกิจ", "เสมอภาค", "เสรีภาพ", "การศึกษา", ] checker = NorvigSpellChecker(custom_dict=user_dict) self.assertEqual(len(checker.dictionary()), len(user_dict)) user_dict = { "พหลโยธิน": 1, "ขีตตะสังคะ": 2, "พนมยงค์": 3, "ภมรมนตรี": 4, "มิตรภักดี": 5, "ลพานุกรม": 6, "สิงหเสนี": 7, } checker = NorvigSpellChecker(custom_dict=user_dict) # "พหลโยธิน" will be removed, # as it has frequency less than default min_freq (2) self.assertEqual(len(checker.dictionary()), len(user_dict) - 1) user_dict = [24, 6, 2475] with self.assertRaises(TypeError): checker = NorvigSpellChecker(custom_dict=user_dict)
def test_norvig_spell_checker(self): checker = NorvigSpellChecker(dict_filter=None) self.assertTrue(len(checker.dictionary()) > 0) self.assertGreaterEqual(checker.prob("มี"), 0)