def test_en_bug_correct2(self): """测试英文纠错bug""" spell = EnSpell() spell.check_init() print(spell.word_freq_dict.get('whould')) print(spell.candidates('whould')) a = spell.correct_word('whould') print(a) r = en_correct('contend proble poety adress whould niether quaties') print(r) assert en_correct('whould')[0] == 'would' # no error
def en_correct_t(): assert en_correct('spelling') == 'spelling' # no error assert en_correct('speling') == 'spelling' # insert assert en_correct('correctud') == 'corrected' # replace 1 assert en_correct('gorrectud') == 'corrected' # replace 2 assert en_correct('bycycle') == 'bicycle' # replace assert en_correct('inconvient') == 'inconvenient' # insert 2 assert en_correct('arrainged') == 'arranged' # delete assert en_correct('peotrry') == 'poetry' # transpose + delete assert en_correct('word') == 'word' # know assert en_correct('quintessential') == 'quintessential' # unknow assert words('the test is it.') == ['the', 'test', 'is', 'it'] # segment assert len(spell.WORDS) > 100 assert spell.WORDS['the'] > 100 assert en_probability('word') > 0 assert en_probability('quintessential') == 0 assert 0.07 < en_probability('the') < 0.08 return 'unit_test pass'
def spell_t(tests, verbose=False): """ run en_correct(wrong) on all (right,wrong) pairs, and report result :param tests: :param verbose: :return: """ import time start = time.clock() good, unknown = 0, 0 n = len(tests) for right, wrong in tests: w = en_correct(wrong) good += (w == right) if w != right: unknown += (right not in spell.WORDS) if verbose: print('en_correct({}) => {} ({}); expected {} ({})'.format(wrong, w, spell.WORDS[w], right, spell.WORDS[right])) dt = time.clock() - start print('{:.0%} of {} correct ({:.0%} unknown) at {:.0f} words per second'.format(good / n, n, unknown / n, n / dt))
def spell_t(tests, verbose=False): """ run en_correct(wrong) on all (right,wrong) pairs, and report result :param tests: :param verbose: :return: """ import time start = time.clock() good, unknown = 0, 0 n = len(tests) for right, wrong in tests: w = en_correct(wrong)[0] good += (w == right) if w != right: unknown += (right not in spell.word_freq_dict) if verbose: print('en_correct({}) => {}; expected {}'.format( wrong, w, right)) dt = time.clock() - start print('acc: {:.0%}, total num: {}, ({:.0%} unknown), speed: {:.0f} ' 'words per second'.format(good / n, n, unknown / n, n / dt))
def test_en_bug_correct1(self): """测试英文纠错bug""" r = en_correct('folder payroll connectivity website') print(r) assert en_correct('spelling')[0] == 'spelling' # no error
def test_en_correct(self): """测试英文纠错""" print( en_correct( 'spelling speling correctud gorrectud bycycle inconvient arrainged peotrry word quintessential' )) print(en_correct('spelling')[0]) assert en_correct('spelling')[0] == 'spelling' # no error assert en_correct('speling')[0] == 'spelling' # insert assert en_correct('correctud')[0] == 'corrected' # replace 1 assert en_correct('gorrectud')[0] == 'corrected' # replace 2 assert en_correct('bycycle')[0] == 'bicycle' # replace assert en_correct('inconvient')[0] == 'inconvenient' # insert 2 assert en_correct('arrainged')[0] == 'arranged' # delete assert en_correct('peotrry')[0] == 'poetry' # transpose + delete assert en_correct('word')[0] == 'word' # know assert en_correct('quintessential')[0] == 'quintessential' # unknow assert len(spell.word_freq_dict) > 100 assert spell.word_freq_dict['the'] > 100 assert en_probability('word') > 0 assert en_probability('quintessentialkk') == 0 print(en_probability('the')) return 'unit_test pass'