Example #1
0
def fix_spells():
    """ Runs through the document, finds all the bad words, then 
    tries to find fixed versions of them.
    """
    lang = get_lang()
    checker = spell_checker.AspellSpellChecker(lang)

    db = document_builder.SpellcheckDocMaker(checker)
    db.make_word_fix_doc('text/clean')
Example #2
0
def simple_clean():
    """ Simple cleanup for testing algorithm. """
    os.system('cp text/raw/* text/simple_clean/')
    lang = get_lang()
    checker = spell_checker.AspellSpellChecker(lang)
    #   checker.fixer = spell_checker.SimpleEnglishSpellFixer()
    db = document_builder.SpellcheckDocMaker(checker)
    db.remove_possible_headers('text/simple_clean')
    lm = line_manager.LineManager(
        spell_checker.FileConfiguredSpellChecker(lang))
    lm.load('text/simple_clean')
    lm.quick_fix()
    lm.join_lines()
    #   lm.write_pages('text/simple_clean', False)
    #   db.make_word_fix_doc('text/simple_clean')
    lm.write_pages('text/simple_clean', True)
Example #3
0
 def test_checkables(self):
     to_test = (
         ('bad', 'company', [
             'badcompany',
             'bacompany',
         ]),
         ('bad-', 'company', [
             'badcompany',
         ]),
         ("bad'", 'company', [
             'badcompany',
         ]),
         ('bad', 'Company', []),
         (u'ba\u00E0', 'company', [
             u'ba\u00E0company',
             'bacompany',
         ]),
         ('bad', u'\u00E0ompany', [
             u'bad\u00E0ompany',
             u'ba\u00E0ompany',
         ]),
         (
             'ba',
             'company',
             [],
         ),
         (
             'bad',
             'com',
             [
                 'badcom',
                 'bacom',
             ],
         ),
         (
             'bad',
             'co',
             [],
         ),
     )
     db = document_builder.SpellcheckDocMaker(
         spell_checker.StubSpellChecker([]))
     for word1, word2, expected in to_test:
         self.assertEqual(spell_checker.joinables(word1, word2), expected)
Example #4
0
 def test_fixed_words(self):
     sc = spell_checker.StubSpellChecker([
         'Cantrip',
         'government',
         'bomb',
         'born',
         'bod',
         "he'll",
         'What',
         'hiss',
         'different',
     ])
     db = document_builder.SpellcheckDocMaker(sc)
     for test, expected in test_expected(
             '{}/test_spellcheck/fix_spelling'.format(PATH)):
         self.assertEqual(
             db.fixed_words((test, )).values()[0], [
                 expected,
             ])
Example #5
0
def remove_headers():
    lang = get_lang()
    dict_ = './dict.{}.pws'.format(lang)
    checker = spell_checker.AspellSpellChecker(lang, dict_)
    db = document_builder.SpellcheckDocMaker(checker)
    db.remove_possible_headers('text/clean')
Example #6
0
def cross_line_fixes():
    lang = get_lang()
    checker = spell_checker.AspellSpellChecker(lang)
    db = document_builder.SpellcheckDocMaker(checker)
    db.make_line_join_doc('text/clean')
Example #7
0
def proper_names():
    lang = get_lang()
    dict_ = './dict.{}.pws'.format(lang)
    checker = spell_checker.AspellSpellChecker(lang, dict_)
    db = document_builder.SpellcheckDocMaker(checker)
    db.make_possible_proper_name_doc('text/clean')
Example #8
0
def possible_headers():
    lang = get_lang()
    checker = spell_checker.AspellSpellChecker(lang)
    db = document_builder.SpellcheckDocMaker(checker)
    db.possible_headers('text/raw')
Example #9
0
def page_info():
    lang = get_lang()
    checker = spell_checker.AspellSpellChecker(lang)
    db = document_builder.SpellcheckDocMaker(checker)
    db.page_image_info('text/raw', 'images/pages')