예제 #1
0
 def test_word_in(self):
     """ test the use of the `in` operator """
     spell = EnSpell()
     spell.check_init()
     self.assertTrue('key' in spell.word_freq_dict)
     self.assertFalse('wantthis'
                      in spell.word_freq_dict)  # a known excluded word
     self.assertEqual(spell.word_freq_dict.get('a', 0), 48779620)
예제 #2
0
    def test_word_known(self):
        """ test if the word is a `known` word or not """
        spell = EnSpell()
        self.assertEqual(spell.known(['this']), {'this'})
        self.assertEqual(spell.known(['hi']), {'hi'})
        self.assertEqual(spell.known(['holmes']), {'holmes'})
        self.assertEqual(spell.known(['known']), {'known'})

        self.assertEqual(spell.known(['-']), set())
        self.assertEqual(spell.known(['foobar']), set())
        self.assertEqual(spell.known(['ths']), set())
        self.assertEqual(spell.known(['ergos']), set())
예제 #3
0
    def test_case_insensitive_parse_words(self):
        """ Test using the parse words to generate a case insensitive dict """
        spell_old = EnSpell()
        spell_old.check_init()
        print(spell_old.word_freq_dict.get('thisss', 0))
        assert spell_old.word_freq_dict.get('thisss', 0) == 0

        dic = get_word_freq_dict_from_text("thisss is a Test of the test!")
        print(dic)
        spell_new = EnSpell(word_freq_dict=dic)
        print(spell_new.word_freq_dict.get('thisss', 0))
        # in makes sure it is lower case in this instance
        self.assertTrue(spell_new.word_freq_dict.get('thisss', 0) == 1)
예제 #4
0
 def test_candidates(self):
     """ test spell checker candidates """
     spell = EnSpell()
     spell.check_init()
     print(spell.word_freq_dict.get('ths'), spell.candidates('ths'))
     self.assertEqual(len(spell.candidates('ths')) > 0, True)
     self.assertEqual(spell.candidates('the'), {'the'})
     self.assertEqual(spell.candidates('hi'), {'hi'})
     # something that cannot exist... should return just the same element...
     self.assertEqual(''.join(spell.candidates('manasaeds')), 'manasaeds')
예제 #5
0
 def test_correct(self):
     """ test spell checker corrects """
     spell = EnSpell()
     self.assertEqual(spell.correct('ths')[0], 'the')
     self.assertEqual(spell.correct('ergo')[0], 'ergo')
     # self.assertEqual(spell.correct('alot'), 'a lot')
     self.assertEqual(spell.correct('this')[0], 'this')
     self.assertEqual(spell.correct('-')[0], '-')
     self.assertEqual(spell.correct('1213')[0], '1213')
     self.assertEqual(''.join(spell.correct('1213.9')), '1213.9')
예제 #6
0
    def test_en_bug_correct2(self):
        """测试英文纠错bug"""
        spell = EnSpell()
        spell.check_init()
        print(spell.word_freq_dict.get('whould'))
        print(spell.candidates('whould'))

        a = spell.correct_word('whould')
        print(a)
        r = en_correct('contend proble poety adress whould niether  quaties')
        print(r)
        assert en_correct('whould')[0] == 'would'  # no error
예제 #7
0
 def test_word_frequency(self):
     """ test word frequency """
     spell = EnSpell()
     spell.check_init()
     # if the default load changes so will this...
     self.assertEqual(spell.word_freq_dict.get('he'), 12846723)
예제 #8
0
    print(details)
    print()

    # 2. 演示英文句子列表纠错
    sent_lst = ['what hapenning?','how to speling it', 'gorrect', 'i know']
    for sent in sent_lst:
        corrected_text, details = pycorrector.en_correct(sent)
        if details:
            print('[error] ', sent, '=>', corrected_text, details)
    print()

    # 3. 演示自定义英文词典
    from pycorrector.en_spell import EnSpell

    sent = "what is your name? shylock?"
    spell = EnSpell()
    corrected_text, details = spell.correct(sent)
    print(sent, '=>', corrected_text, details)
    print('-' * 42)
    my_dict = {'your': 120, 'name': 2, 'is': 1, 'shylock': 1, 'what': 1}  # word, freq
    spell = EnSpell(word_freq_dict=my_dict)
    corrected_text, details = spell.correct(sent)
    print(sent, '=>', corrected_text, details)
    print()

    # 4. 演示自定义纠错集
    from pycorrector.en_spell import EnSpell

    spell = EnSpell()
    sent = "what happt ? who is shylock."
    corrected_text, details = spell.correct(sent)