コード例 #1
0
class InteractiveSpellchecker(object):
    def __init__(self):
        self.checker = enchant.checker.SpellChecker("en_US")
        self.cmdline_checker = CmdLineChecker()
        self.cmdline_checker.set_checker(self.checker)
        self.result = []
    
    def process_text(self, text):
        """
        accepts: [String] text input
        returns: [List] list of lower-case tokens with URLs filtered out
        """
        try:
            del self.result[:]
            to_check = [] 
            for (word,pos) in basic_tokenize(text): 
                if '@' not in word and 'RT' not in word: to_check.append(word) 
            tknzr = get_tokenizer("en_US",filters=[URLFilter])
            return [word for (word,pos) in tknzr(' '.join(to_check))]
        except UnicodeEncodeError: pass

    def do_check(self,word): 
        self.checker.set_text(word)
        self.cmdline_checker.run()
        correct = self.checker.get_text().lower()
        if '#' not in correct:
            self.result.extend(correct.split())
コード例 #2
0
def do_check(checker,to_check):
    for text in to_check:
        checker.set_text(text)
        cmdline_checker = CmdLineChecker()
        cmdline_checker.set_checker(checker)
        cmdline_checker.run()
        to_check[to_check.index(text)] = checker.get_text()
コード例 #3
0
def spell_check(text):
    """Spell checker."""
    chkr.set_text(text)
    cmdln = CmdLineChecker()
    cmdln.set_checker(chkr)
    cmdln.run()

    return chkr.get_text()
コード例 #4
0
class CmdLineSpellChecker(object):
    def __init__(self, language, pwl=None):
        if pwl:
            language = enchant.DictWithPWL(language, pwl)
        self._checker = _SpellChecker(lang=language, filters=filters_to_use)
        self.cmdln = CmdLineChecker()
        self.cmdln.set_checker(self._checker)

    def check(self, text):
        self._checker.set_text(text)
        self.cmdln.run()
        return self._checker.get_text()
コード例 #5
0
class CmdLineSpellChecker(object):
    def __init__(self, language, pwl=None):
        if pwl:
            language = enchant.DictWithPWL(language, pwl)
        self._checker = _SpellChecker(lang=language,
                                      filters=filters_to_use)
        self.cmdln = CmdLineChecker()
        self.cmdln.set_checker(self._checker)

    def check(self, text):
        self._checker.set_text(text)
        self.cmdln.run()
        return self._checker.get_text()
コード例 #6
0
 def __init__(self, language, pwl=None):
     if pwl:
         language = enchant.DictWithPWL(language, pwl)
     self._checker = _SpellChecker(lang=language,
                                   filters=filters_to_use)
     self.cmdln = CmdLineChecker()
     self.cmdln.set_checker(self._checker)
コード例 #7
0
ファイル: spellchecker.py プロジェクト: douglase/ipythontools
def jupyterspellchecker():
    parser = argparse.ArgumentParser(description='''Spell check a Jupyter/IPython notebook to a LaTeX file.

Raw cells and markdown cells are spell checked in American English.
''')
    parser.add_argument('infile', help='path and filename of the input notebook file.')
    parser.add_argument('outfile', help='path and filename of the output file.')
    args = parser.parse_args()

    chkr = enchant.checker.SpellChecker("en_US", filters=[LatexCommandFilter])
    cmdln = CmdLineChecker()
    cmdln.set_checker(chkr)


    with open(args.infile, 'r') as f:
        print('Parsing ', args.infile)
        ipynb = json.load(f)

    if 'cells' in ipynb:
        # newer versions of notebook
        cells = ipynb['cells']
    else:
        # notebook format 1
        cells = ipynb['worksheets'][0]['cells']

    for cell in cells:
        if cell['cell_type'] in ['markdown', 'raw', 'heading']:
            for i, line in enumerate(cell['source']):
                chkr.set_text(line)
                cmdln.run()
                cell['source'][i] = chkr.get_text()

    with open(args.outfile, 'w') as f:
        print('Writing ', args.outfile)
        json.dump(ipynb, f)
    sys.exit()
コード例 #8
0
def jupyterspellchecker():
    parser = argparse.ArgumentParser(
        description='''Spell check a Jupyter/IPython notebook to a LaTeX file.

Raw cells and markdown cells are spell checked in American English.
''')
    parser.add_argument('infile',
                        help='path and filename of the input notebook file.')
    parser.add_argument('outfile',
                        help='path and filename of the output file.')
    args = parser.parse_args()

    chkr = enchant.checker.SpellChecker("en_US", filters=[LatexCommandFilter])
    cmdln = CmdLineChecker()
    cmdln.set_checker(chkr)

    with open(args.infile, 'r') as f:
        print('Parsing ', args.infile)
        ipynb = json.load(f)

    if 'cells' in ipynb:
        # newer versions of notebook
        cells = ipynb['cells']
    else:
        # notebook format 1
        cells = ipynb['worksheets'][0]['cells']

    for cell in cells:
        if cell['cell_type'] in ['markdown', 'raw', 'heading']:
            for i, line in enumerate(cell['source']):
                chkr.set_text(line)
                cmdln.run()
                cell['source'][i] = chkr.get_text()

    with open(args.outfile, 'w') as f:
        print('Writing ', args.outfile)
        json.dump(ipynb, f)
    sys.exit()
コード例 #9
0
 def __init__(self, language, pwl=None):
     if pwl:
         language = enchant.DictWithPWL(language, pwl)
     self._checker = _SpellChecker(lang=language, filters=filters_to_use)
     self.cmdln = CmdLineChecker()
     self.cmdln.set_checker(self._checker)
コード例 #10
0
 def __init__(self):
     self.checker = enchant.checker.SpellChecker("en_US")
     self.cmdline_checker = CmdLineChecker()
     self.cmdline_checker.set_checker(self.checker)
     self.result = []
コード例 #11
0
just in case something gets screwed up.
'''
import re
import json
import sys

import enchant
import enchant.tokenize
import enchant.checker
from enchant.checker.CmdLineChecker import CmdLineChecker

class LatexCommandFilter(enchant.tokenize.EmailFilter):
    _pattern = re.compile(r"\\([^a-zA-Z]|[a-zA-Z]+)")

chkr = enchant.checker.SpellChecker("en_US", filters=[LatexCommandFilter])
cmdln = CmdLineChecker()
cmdln.set_checker(chkr)


with open(sys.argv[1], 'r') as f:
    print 'Parsing ', sys.argv[1]
    ipynb = json.load(f)

for cell in ipynb['worksheets'][0]['cells']:
    if cell['cell_type'] in ['markdown', 'raw', 'heading']:
        for i, line in enumerate(cell['source']):
            chkr.set_text(line)
            cmdln.run()
            cell['source'][i] = chkr.get_text()

with open(sys.argv[2], 'w') as f:
コード例 #12
0
# Pretty simple command line spellchecker for NWN dialogs...
# Note this requires PyEnchant and it's command line is some
# what wonky.  Type 'h' at the command prompt to get options
# for correcting wor

import enchant
import enchant.checker
from enchant.checker.CmdLineChecker import CmdLineChecker

from pynwn.module import Module

if __name__ == '__main__':
    # Using US english dictionary.
    chkr = enchant.checker.SpellChecker('en_US')
    cmdln = CmdLineChecker()
    cmdln.set_checker(chkr)

    mod = Module('test.mod')

    for dlg in mod.glob('*.dlg'):
        print(dlg.resref)
        for n in dlg.entries:
            if n.get_text(0) is None or len(n.get_text(0)) == 0:
                continue

            print n.get_text(0)
            chkr.set_text(n.get_text(0))
            cmdln.run()
            n.set_text(0, chkr.get_text())