Example #1
0
    def test_en_bug_correct2(self):
        """测试英文纠错bug"""
        spell = EnSpell()
        spell.check_init()
        print(spell.word_freq_dict.get('whould'))
        print(spell.candidates('whould'))

        a = spell.correct_word('whould')
        print(a)
        r = en_correct('contend proble poety adress whould niether  quaties')
        print(r)
        assert en_correct('whould')[0] == 'would'  # no error
Example #2
0
def en_correct_t():
    assert en_correct('spelling') == 'spelling'  # no error
    assert en_correct('speling') == 'spelling'  # insert
    assert en_correct('correctud') == 'corrected'  # replace 1
    assert en_correct('gorrectud') == 'corrected'  # replace 2
    assert en_correct('bycycle') == 'bicycle'  # replace
    assert en_correct('inconvient') == 'inconvenient'  # insert 2
    assert en_correct('arrainged') == 'arranged'  # delete
    assert en_correct('peotrry') == 'poetry'  # transpose + delete
    assert en_correct('word') == 'word'  # know
    assert en_correct('quintessential') == 'quintessential'  # unknow
    assert words('the test is it.') == ['the', 'test', 'is', 'it']  # segment
    assert len(spell.WORDS) > 100
    assert spell.WORDS['the'] > 100
    assert en_probability('word') > 0
    assert en_probability('quintessential') == 0
    assert 0.07 < en_probability('the') < 0.08
    return 'unit_test pass'
Example #3
0
def spell_t(tests, verbose=False):
    """
    run en_correct(wrong) on all (right,wrong) pairs, and report result
    :param tests:
    :param verbose:
    :return:
    """
    import time
    start = time.clock()
    good, unknown = 0, 0
    n = len(tests)
    for right, wrong in tests:
        w = en_correct(wrong)
        good += (w == right)
        if w != right:
            unknown += (right not in spell.WORDS)
            if verbose:
                print('en_correct({}) => {} ({}); expected {} ({})'.format(wrong, w, spell.WORDS[w], right,
                                                                           spell.WORDS[right]))
    dt = time.clock() - start
    print('{:.0%} of {} correct ({:.0%} unknown) at {:.0f} words per second'.format(good / n, n, unknown / n, n / dt))
Example #4
0
def spell_t(tests, verbose=False):
    """
    run en_correct(wrong) on all (right,wrong) pairs, and report result
    :param tests:
    :param verbose:
    :return:
    """
    import time
    start = time.clock()
    good, unknown = 0, 0
    n = len(tests)
    for right, wrong in tests:
        w = en_correct(wrong)[0]
        good += (w == right)
        if w != right:
            unknown += (right not in spell.word_freq_dict)
            if verbose:
                print('en_correct({}) => {}; expected {}'.format(
                    wrong, w, right))
    dt = time.clock() - start
    print('acc: {:.0%}, total num: {}, ({:.0%} unknown), speed: {:.0f} '
          'words per second'.format(good / n, n, unknown / n, n / dt))
Example #5
0
 def test_en_bug_correct1(self):
     """测试英文纠错bug"""
     r = en_correct('folder payroll connectivity website')
     print(r)
     assert en_correct('spelling')[0] == 'spelling'  # no error
Example #6
0
    def test_en_correct(self):
        """测试英文纠错"""
        print(
            en_correct(
                'spelling speling correctud gorrectud bycycle inconvient arrainged peotrry word quintessential'
            ))
        print(en_correct('spelling')[0])
        assert en_correct('spelling')[0] == 'spelling'  # no error
        assert en_correct('speling')[0] == 'spelling'  # insert
        assert en_correct('correctud')[0] == 'corrected'  # replace 1
        assert en_correct('gorrectud')[0] == 'corrected'  # replace 2
        assert en_correct('bycycle')[0] == 'bicycle'  # replace
        assert en_correct('inconvient')[0] == 'inconvenient'  # insert 2
        assert en_correct('arrainged')[0] == 'arranged'  # delete
        assert en_correct('peotrry')[0] == 'poetry'  # transpose + delete
        assert en_correct('word')[0] == 'word'  # know
        assert en_correct('quintessential')[0] == 'quintessential'  # unknow

        assert len(spell.word_freq_dict) > 100
        assert spell.word_freq_dict['the'] > 100
        assert en_probability('word') > 0
        assert en_probability('quintessentialkk') == 0
        print(en_probability('the'))
        return 'unit_test pass'