コード例 #1
0
ファイル: views.py プロジェクト: SuphanutN/geo_spellchecker
def spell_checker(request):
    # if this is a POST request we need to process the form data
    text = ""
    if request.method == 'POST':
        # create a form instance and populate it with data from the request:
        form = TextForm(request.POST)
        
        # check whether it's valid:
        if form.is_valid():
            # process the data in form.cleaned_data as required
            InputText = form.cleaned_data['InputText']

            dict_file = open("/root/geo_spellchecker/django_server/spell_checker/dataset/dict.pkl", "rb")
            geo_dictlist = pickle.load(dict_file)

            geo_spell_checker = NorvigSpellChecker(custom_dict=geo_dictlist,min_freq=5)
            allword_prob = geo_spell_checker.spell(InputText)
            #prob = geo_spell_checker.prob(test_text)
            predict_word = allword_prob[0]

            print("Input Text: "+InputText)
            print("Result Text: "+predict_word)

            text = predict_word

            # redirect to a new URL:
            return render(request, 'spell_checker_form.html', {'form': form, 'text': text}, )

    # if a GET (or any other method) we'll create a blank form
    else:
        form = TextForm()

    return render(request, 'spell_checker_form.html', {'form': form, 'text': text}, )
コード例 #2
0
ファイル: __init__.py プロジェクト: xemoe/pythainlp
    def test_spell(self):
        self.assertEqual(spell(None), "")
        self.assertEqual(spell(""), "")
        self.assertIsNotNone(spell("เน้ร"))
        self.assertIsNotNone(spell("เกสมร์"))

        self.assertEqual(correct(None), "")
        self.assertEqual(correct(""), "")
        self.assertIsNotNone(correct("ทดสอง"))

        checker = NorvigSpellChecker(dict_filter="")
        self.assertIsNotNone(checker.dictionary())
        self.assertGreaterEqual(checker.prob("มี"), 0)
コード例 #3
0
ファイル: __init__.py プロジェクト: wannaphongcom/pythainlp
    def test_spell(self):
        self.assertEqual(spell(None), "")
        self.assertEqual(spell(""), "")
        self.assertIsNotNone(spell("เน้ร"))
        self.assertIsNotNone(spell("เกสมร์"))

        self.assertEqual(correct(None), "")
        self.assertEqual(correct(""), "")
        self.assertIsNotNone(correct("ทดสอง"))

        checker = NorvigSpellChecker(dict_filter="")
        self.assertIsNotNone(checker.dictionary())
        self.assertGreaterEqual(checker.prob("มี"), 0)
コード例 #4
0
    def test_norvig_spell_checker(self):
        checker = NorvigSpellChecker(dict_filter=None)
        self.assertTrue(len(checker.dictionary()) > 0)
        self.assertGreaterEqual(checker.prob("มี"), 0)

        custom_dict = [
            ("การงาน", 31),  # longer than max_len
            ("กาม", 1),  # fewer than min_freq
            ("กาล0", 64),  # has digit
            ("๒๔๗๕", 64),  # has digit
            ("hello", 8),  # not Thai
            ("ลบ", -1),  # negative count
            ("การ", 42),  # OK
        ]
        checker = NorvigSpellChecker(custom_dict=custom_dict,
                                     min_freq=2,
                                     max_len=5)
        self.assertEqual(len(checker.dictionary()), 1)
コード例 #5
0
 def Tokenize_word(self,text):
     
     ######## Thai word segment ######## ver1
     '''sent = text[0].replace("'","")
     word = word_tokenize(sent, engine='deepcut') # use this method
     wword = [x.replace('.',' ').replace('=',' ').replace('-',' ').replace("("," ").replace(")"," ").replace("/"," ").replace('สำหรับ',' ').replace('%',' ').strip(' ') for x in word]
     words =[]
     for w in wword:
         if w not in common.thai_stopwords():
             words = [str for str in words if str]
             words.append(w)
     return words'''
 
     ######## Thai word segment ######## ver2 -> stopwords, type of words, check spell(Eng & Thai)
     sent = text[0].replace("'","")    
     word = word_tokenize(sent, engine='deepcut') # use this method
     #wword = [x.replace('=',' ').replace('-',' ').replace("("," ").replace(")"," ").replace("/"," ").strip(' ') for x in word]
     th_no_stopwords =[]
     all_no_stopwords =[]
     th_correct_words =[]
     eng_correct_words =[]
     mix_correct_words =[]
     mix1_correct_words =[]
     all_correct_words =[]
     all_correct_words_final =[]
     check_thai_list = []
     #for tw in wword:
     for tw in word:
         if tw not in common.thai_stopwords():
             th_no_stopwords = [str for str in th_no_stopwords if str]
             th_no_stopwords.append(tw)
     #print("th_no_stopwords = ", th_no_stopwords)
     for ew in th_no_stopwords:
         if ew not in stopwords.words('english'):
             all_no_stopwords = [str for str in all_no_stopwords if str]        
             all_no_stopwords.append(ew)
     #print("all_no_stopwords = ", all_no_stopwords)
     for c in all_no_stopwords:
         thai = isthai(c)
         number = c.isnumeric()
         if not thai:
             no_num = c.isalpha()
             match1 = re.findall('\D', c) #Return ถ้าไม่พบตัวเลข 0-9 ใน string
             if no_num:
                 spell = SpellChecker()
                 eng_correct = spell.correction(c) #pn
                 eng_correct_words.append(eng_correct)
                 #print("eng = ", eng_correct)
             elif match1:
                 mix = c
                 mix_correct_words.append(mix)
                 #print("mix = ", mix)
             else:
                 num = c #No return
                 #print("num = ", num)
         elif thai:
             checker = NorvigSpellChecker(custom_dict=tnc.word_freqs()) #pn
             th_correct = checker.correct(c)
             th_correct_words.append(th_correct)
             #print("thai = ", th_correct)
           
     all_correct_words = th_correct_words + eng_correct_words + mix_correct_words
     all_correct_words = [x.replace('น.','').replace(':',' ').replace('=',' ').replace('–',' ').replace("("," ").replace(")"," ").replace("/"," ").strip(" ") for x in all_correct_words]
     all_correct_words_final = list(filter(None, all_correct_words))
     #print("words = ", all_correct_words_final)  
     return all_correct_words_final
 
     
     ######## Eng word segment ########
     '''word = text[0]
コード例 #6
0
ファイル: test_spell.py プロジェクト: NoerNova/pythainlp
    def test_norvig_spell_checker(self):
        checker = NorvigSpellChecker(dict_filter=None)
        self.assertTrue(len(checker.dictionary()) > 0)
        self.assertGreaterEqual(checker.prob("มี"), 0)

        user_dict = [
            ("การงาน", 31),  # longer than max_len
            ("กาม", 1),  # fewer than min_freq
            ("กาล0", 64),  # has digit
            ("๒๔๗๕", 64),  # has digit
            ("hello", 8),  # not Thai
            ("ลบ", -1),  # negative count
            ("การ", 42),  # OK
        ]
        checker = NorvigSpellChecker(
            custom_dict=user_dict, min_freq=2, max_len=5
        )
        self.assertEqual(len(checker.dictionary()), 1)

        user_dict = [
            "เอกราช",
            "ปลอดภัย",
            "เศรษฐกิจ",
            "เสมอภาค",
            "เสรีภาพ",
            "การศึกษา",
        ]
        checker = NorvigSpellChecker(custom_dict=user_dict)
        self.assertEqual(len(checker.dictionary()), len(user_dict))

        user_dict = {
            "พหลโยธิน": 1,
            "ขีตตะสังคะ": 2,
            "พนมยงค์": 3,
            "ภมรมนตรี": 4,
            "มิตรภักดี": 5,
            "ลพานุกรม": 6,
            "สิงหเสนี": 7,
        }
        checker = NorvigSpellChecker(custom_dict=user_dict)
        # "พหลโยธิน" will be removed,
        # as it has frequency less than default min_freq (2)
        self.assertEqual(len(checker.dictionary()), len(user_dict) - 1)

        user_dict = [24, 6, 2475]
        with self.assertRaises(TypeError):
            checker = NorvigSpellChecker(custom_dict=user_dict)
コード例 #7
0
ファイル: test_spell.py プロジェクト: madmuv/pythainlp
 def test_norvig_spell_checker(self):
     checker = NorvigSpellChecker(dict_filter=None)
     self.assertTrue(len(checker.dictionary()) > 0)
     self.assertGreaterEqual(checker.prob("มี"), 0)