Beispiel #1
0
    def initSpellchecker(self):
        # TODO: disable spellchecker icon in case of not working enchant
        try:
            import enchant
            spellDictDir = settings.get('spellchecker:directory')
            if spellDictDir:
                if enchant.__ver_major__ >= 1 and enchant.__ver_minor__ >= 6:
                    enchant.set_param("enchant.myspell.dictionary.path",
                                      spellDictDir)
                else:
                    print("Your pyenchant version is to old. Please " \
                          "upgrade to version 1.6.0 or higher, if you want " \
                          "to use spellchecker.")
                    return None

            spellLang = settings.get('spellchecker:lang')
            if spellLang in enchant.list_languages():
            # enchant.dict_exists(spellLang) do now work for me on linux...
                self.dict = enchant.Dict(spellLang)
            else:
                # try dictionary based on the current locale
                try:
                    self.dict = enchant.Dict()
                    settings.set('spellchecker:lang', self.dict.tag)
                except:  # we don not have working dictionary...
                    return None
            if self.dict:
                self.usePWL(self.dict)

        except:
            print("can not start spellchecker!!!")
            import traceback
            traceback.print_exc()
            return None
def aspell(string):
    '''
    если ok:    False, []
    если ошибка: True, aspell_sug

    '''
    enchant.set_param("enchant.aspell.dictionary.path",
                      "./aspell6-ru-0.99f7-1")
    if not enchant.dict_exists('ru_RU'):
        copyfile(
            './resources/aspell/ru_RU.dic',
            os.path.dirname(enchant.__file__).replace('\\', '/') +
            '/share/enchant/myspell/ru_RU.dic')
        copyfile(
            './resources/aspell/ru_RU.aff',
            os.path.dirname(enchant.__file__).replace('\\', '/') +
            '/share/enchant/myspell/ru_RU.aff')
    d = enchant.Dict("ru_RU")
    if d.check(string):  # aspell says OK
        # print('aspell says OK')
        return False, []
    else:
        # print('aspell says mistake and suggests:')
        aspell_sug = d.suggest(string)
        return True, aspell_sug
Beispiel #3
0
    def initSpellchecker(self):
        # TODO: disable spellchecker icon in case of not working enchant
        try:
            import enchant
            spellDictDir = settings.get('spellchecker:directory')
            if spellDictDir:
                if enchant.__ver_major__ >= 1 and enchant.__ver_minor__ >= 6:
                    enchant.set_param("enchant.myspell.dictionary.path",
                                      spellDictDir)
                else:
                    print "Your pyenchant version is to old. Please " \
                          "upgrade to version 1.6.0 or higher, if you want " \
                          "to use spellchecker."
                    return None

            spellLang = settings.get('spellchecker:lang')
            if enchant.dict_exists(spellLang):
                self.dict = enchant.Dict(spellLang)
            else:
                # try dictionary based on the current locale
                try:
                    self.dict = enchant.Dict()
                    settings.set('spellchecker:lang', self.dict.tag)
                except:  # we don not have working dictionary...
                    return None
            if self.dict:
                self.usePWL(self.dict)

        except:
            print "can not start spellchecker!!!"
            import traceback
            traceback.print_exc()
            return None
Beispiel #4
0
def get_spellchecker_languages(directory = None):
    """
    Check if spellchecker is installed and provide list of languages
    """
    try:
        import enchant

        if (directory):
            enchant.set_param("enchant.myspell.dictionary.path", directory)
        langs = enchant.list_languages()
        return sorted(langs)

    except:
        print "can not start spellchecker!!!"
        import traceback
        traceback.print_exc()
        return None
Beispiel #5
0
def get_spellchecker_languages(directory=None):
    """
    Check if spellchecker is installed and provide list of languages
    """
    try:
        import enchant

        if (directory):
            enchant.set_param("enchant.myspell.dictionary.path", directory)
        langs = enchant.list_languages()
        return sorted(langs)

    except:
        print "can not start spellchecker!!!"
        import traceback
        traceback.print_exc()
        return None
Beispiel #6
0
#!/usr/bin/env python
# Purpose: find words in the word puzzle game "Alpha Omega"
# Syntax: ./findword.py


from itertools import permutations
import enchant
enchant.set_param("enchant.myspell.dictionary.path", "/opt/local/var/macports/software/aspell")

#print enchant.list_dicts()
d = enchant.Dict("en_US")

# type in a series of letters
print "Provide the input letters, please!"
try:
    letters = raw_input()
    print "The input letters are: ", letters
except ValueError:
    print "Not a series of letters"

# find all combinations of letters and check if they are a word
perms = [''.join(p) for p in permutations( letters )]
newperms = set( perms )
i = 0
for elem in newperms:
    if d.check( elem ) == True:
        print "A possible word is: ", elem
        i = 1
if i == 0:
    print "Can't find a word with the letters."
Beispiel #7
0
 def set_dictionary_path(cls, path):
     """Additional paths to find dictionaries"""
     enchant.set_param('enchant.myspell.dictionary.path', path)
     SpellChecker.languages = [(language, locales.code_to_name(language))
                               for language in enchant.list_languages()]
     SpellChecker._language_map = dict(SpellChecker.languages)
Beispiel #8
0
from rules import rules_back, context_rules
import numpy as np
from collections import Counter
from nltk.metrics import edit_distance
from shutil import copyfile

# import matplotlib.pyplot as plt

def disable_print(*args):
    pass

def disable_pprint(*args):
    pass

mystem = Mystem()
enchant.set_param("enchant.aspell.dictionary.path", "./aspell6-ru-0.99f7-1")
if not enchant.dict_exists('ru_RU'):
    copyfile('./resources/aspell/ru_RU.dic', os.path.dirname(enchant.__file__).replace('\\', '/') + '/share/enchant/myspell/ru_RU.dic')
    copyfile('./resources/aspell/ru_RU.aff', os.path.dirname(enchant.__file__).replace('\\', '/') + '/share/enchant/myspell/ru_RU.aff')
#     res = list(''.join(o) for o in product(*d))
#     res.remove(low)



def old_process(test):
    '''
    в этой версии есть проверка частотности, aspell, hunspell, mystem
    '''
    # загружаем частотник
    big_ru = {}
    with open('./Freq2011/freqrnc2011.csv') as rus:
from pymongo import Connection, DESCENDING
from random import choice
from nltk.tokenize import sent_tokenize,word_tokenize
from nltk.stem import WordNetLemmatizer
from  nltk.stem.lancaster import  LancasterStemmer
from nltk.stem import SnowballStemmer
from nltk.tokenize import RegexpTokenizer,sent_tokenize
import enchant
enchant.set_param("enchant.myspell.dictionary.path","/usr/lib/python2.6/site-packages/pyenchant-1.6.5-py2.6.egg/enchant/share/enchant/myspell")
class MongoDBSequence:
    def __init__(self,host='localhost',port=27017,db='learner',collection='news'):
        self.conn=Connection(host,port)
        self.collection=self.conn[db][collection]
        self.data=self._get_data()

    def __len__(self):
        return self.collection.count()

    def _get_data(self):
        res=self.collection.find()
        for item in res:
            yield item

    def __iter__(self):
        return self

    def next(self):
        #f=lambda d:d.get(field)$
        return self.data.next()

class RandomSentence:
Beispiel #10
0
# This is based on
# https://github.com/wiki-ai/editquality/blob/master/ipython/reverted_detection_demo.ipynb
# and only looks at single diffs
import json
import sys
import mwapi
import bz2

import enchant

enchant.set_param("enchant.myspell.dictionary.path",
                  r"/data/project/kokolores/dicts/usr/share/myspell/dicts/")
d = enchant.Dict("de-DE")

print("Load dataset...")
with bz2.open("../datasets/datasets/dewiki_10000.json.bz2", "rt") as datafile:
    dataset = json.load(datafile)

length = len(dataset)
print("Loaded dataset of length %d" % length)
training_set = dataset[:int(length / 1.33)]
testing_set = dataset[int(length / 1.33):]

from revscoring.features import wikitext, revision_oriented, temporal
from revscoring.languages import german

features = [
    # Catches long key mashes like kkkkkkkkkkkk
    wikitext.revision.diff.longest_repeated_char_added,
    # Measures the size of the change in added words
    wikitext.revision.diff.words_added,
Beispiel #11
0
        print('[DEBUG] Cursor position loaded', data['curPos'])
        cursor.setPosition(data['curPos'])
        window.editor.setTextCursor(cursor)
else:
    newFile()
if data.get('cnt'):
    count = data['cnt']
if data.get('ast'):
    autosaveTime = data['ast']
if not data.get('w'):
    data['w'] = 700
    data['h'] = 750
window.resize(data['w'], data['h'])
window.editor.setFixedHeight(window.height() - 45)
window.editor.setFixedWidth(window.width())
del data

with open('style.qss') as f:
    window.editor.setStyleSheet(f.read())

en.set_param("enchant.hunspell.dictionary.path", '')

##############################################################################################################################################

loadDicts()
window.show()

autosave = AutoSave()
autosave.start()
sys.exit(app.exec_())
Beispiel #12
0
"""
Author: Kyle Martin
Email: [email protected]

Features to be used on texts

"""
import enchant
import spacy
import numpy as np
nlp = spacy.load('de')

# enchant setup
enchant.set_param('enchant.myspell.dictionary.path',
                  'dict_de')
spell_check = enchant.Dict('de_DE_frami')

def spell_checker(doc):
    """ uses an open office german dictionary and the `enchant` library 
        prints the incorrect words to the console. """
    words = [token.text for token in doc
             if token.is_punct is False]

    correct_words = 0
    incorrect_words = []
    for word in words:
        if spell_check.check(word) is True:
            correct_words += 1
        else:
            incorrect_words.append(word)
    if len(words) > 1:
Beispiel #13
0
def main(args=None):
    """Run as a standalone script.

    You can simulate running the script internally by just passing a list with
    arguments in args; these will be treated as though they were the actual
    command line arguments."""
    #Determin name of current command
    if __name__ == "__main__":
        cmd_name = os.path.basename(sys.argv[0])
    elif args is not None and len(args) > 0:
        cmd_name = "welshtools %s" % args[0]
    elif len(sys.argv) > 1:
        cmd_name = "welshtools %s" % os.path.basename(sys.argv[1])
    else:
        cmd_name = __name__

    #Parse Command Line Arguments
    usage = "Usage: %s [options] SOURCE_FILE DEST_FILE" % cmd_name
    epilog = ("Reads SOURCE_FILE line by line and writes a reformatted and "
              "filtered list to DEST_FILE. This is intended to be run on the "
              "frequency lists from the Cronfa Electroneg o Gymraeg and not "
              "guaranteed to work with differently formatted input files. "
              "Please note that the SOURCE_FILE must be converted to utf-8 "
              "before running the script or the script will fail. The output "
              "file is always written in utf-8.")
    parser = OptionParser(usage=usage,
                          version="%s %s" % (cmd_name, shared.__version__),
                          epilog=epilog)
    parser.add_option(
        "-f",
        "--format",
        dest="format",
        metavar="STR",
        help=("Format to use for output file. {WORD} is replaced "
              "with the word and {FREQ} with the frequency. This "
              "can include the control characters \\\\, \\r, \\n,"
              " and \\t.  Default: \"{WORD},{FREQ}\\n\""),
        default="{WORD},{FREQ}\\n")
    parser.add_option(
        "-s",
        "--strict",
        action="store_true",
        dest="strict",
        help=("If --strict is specified, the script will not only"
              " exclude words which are found in the English "
              "dictionary or contain non-Welsh orthographic "
              "characters, but will also strip acute accents and "
              "remove contractions (e.g. 'r), and words which are"
              " hyphenated or contain j or J."))
    parser.add_option(
        "-S",
        "--summary",
        action="store_true",
        dest="print_summary",
        help=("Print a summary of how many "
              "entries were read, written and excluded at the end "
              "of the script. Note that this ignores --quiet."))
    parser.add_option(
        "-q",
        "--quiet",
        action="store_true",
        dest="quiet",
        help="Supress all command line output except for errors.")

    #Parse arguments
    if args is None:
        (opts, args) = parser.parse_args()
    else:
        (opts, args) = parser.parse_args(args)
    if len(args) != 2:
        print("Error: This command requires two arguments. Try `%s --help'." %
              cmd_name)
        return errno.EINVAL
    if opts.quiet:
        opts.verbose = False
    else:
        opts.verbose = True
    if opts.verbose:
        print("Opening source and destination files...  ", end="")
    try:
        fin = codecs.open(args[0], "r", "utf8")
        fin_size = os.path.getsize(args[0])
    except IOError as ex:
        print(
            "\nError: Could not open SOURCE_FILE (" + args[0] +
            ") for reading:", ex)
        return errno.EIO
    try:
        fout = codecs.open(args[1], "w+", "utf8")
    except IOError as ex:
        print(
            "\nError Could not open DEST_FILE (" + args[1] + ") for writing:",
            ex)
        return errno.EIO
    if opts.verbose:
        print("Done.")

    #Load dictionaries
    if opts.verbose:
        print("Loading Enchant dictionaries for en_US, en_GB and cy_GB...  ",
              end="")
    try:
        enchant.set_param("enchant.myspell.dictionary.path",
                          "./geiriadur-cy/dictionaries")
        d_us = enchant.Dict("en_US")
        d_gb = enchant.Dict("en_GB")
        d_cy = enchant.Dict("cy_GB")
    except Exception as ex:  #pylint: disable=broad-except
        print(
            "\nError: Could not open Enchant dictionaries (en_US, en_GB, cy_GB):",
            ex)
        return errno.ENOPKG
    if opts.verbose:
        print("Done.")

    #Set string of allowed characters
    welsh_chrs_strict = set('ABCDEFGHILMNOPRSTUWYabcdefghilmnoprstuwy\\/+%')
    welsh_chrs_all = set(
        ('ABCDEFGHIJLMNOPRSTUWYabcdefghijlmnoprstuwy'
         'ÄËÏÖÜẄŸäëïöüẅÿÂÊÎÔÛŴŶâêîôûŵŷÁÉÍÓÚẂÝáéíóúẃýÀÈÌÒÙẀỲàèìòùẁỳ'
         '\'-'))

    #Set mappings from CEG transcription to UTF8
    if opts.strict:
        if opts.verbose:
            print("Mapping mode: strict.")
        #Strip /, map % onto ¨, map \ onto `, and map + onto ^
        mapping = {
            '/': '',
            'a%': 'ä',
            'e%': 'ë',
            'i%': 'ï',
            'o%': 'ö',
            'u%': 'ü',
            'y%': 'ÿ',
            'w%': 'ẅ',
            'A%': 'Ä',
            'E%': 'Ë',
            'I%': 'Ï',
            'O%': 'Ö',
            'U%': 'Ü',
            'Y%': 'Ŷ',
            'W%': 'Ẅ',
            'a\\': 'à',
            'e\\': 'è',
            'i\\': 'ì',
            'o\\': 'ò',
            'u\\': 'ù',
            'y\\': 'ỳ',
            'w\\': 'ẁ',
            'A\\': 'À',
            'E\\': 'È',
            'I\\': 'Ì',
            'O\\': 'Ò',
            'U\\': 'Ù',
            'Y\\': 'Ỳ',
            'W\\': 'Ẁ',
            'a+': 'â',
            'e+': 'ê',
            'i+': 'î',
            'o+': 'ô',
            'u+': 'û',
            'y+': 'ŷ',
            'w+': 'ŵ',
            'A+': 'Â',
            'E+': 'Ê ',
            'I+': 'Î',
            'O+': 'Ô',
            'U+': 'Û',
            'Y+': 'Ŷ',
            'W+': 'Ŵ'
        }
    else:
        if opts.verbose:
            print("Mapping mode: relaxed.")
        #Map / onto ´, map % onto ¨, map \ onto `, and map + onto ^
        mapping = {
            'a/': 'á',
            'e/': 'é',
            'i/': 'í',
            'o/': 'ó',
            'u/': 'ú',
            'y/': 'ý',
            'w/': 'ẃ',
            'A/': 'Á',
            'E/': 'É',
            'I/': 'Í',
            'O/': 'Ó',
            'U/': 'Ú',
            'Y/': 'Ý',
            'W/': 'Ẃ',
            'a%': 'ä',
            'e%': 'ë',
            'i%': 'ï',
            'o%': 'ö',
            'u%': 'ü',
            'y%': 'ÿ',
            'w%': 'ẅ',
            'A%': 'Ä',
            'E%': 'Ë',
            'I%': 'Ï',
            'O%': 'Ö',
            'U%': 'Ü',
            'Y%': 'Ŷ',
            'W%': 'Ẅ',
            'a\\': 'à',
            'e\\': 'è',
            'i\\': 'ì',
            'o\\': 'ò',
            'u\\': 'ù',
            'y\\': 'ỳ',
            'w\\': 'ẁ',
            'A\\': 'À',
            'E\\': 'È',
            'I\\': 'Ì',
            'O\\': 'Ò',
            'U\\': 'Ù',
            'Y\\': 'Ỳ',
            'W\\': 'Ẁ',
            'a+': 'â',
            'e+': 'ê',
            'i+': 'î',
            'o+': 'ô',
            'u+': 'û',
            'y+': 'ŷ',
            'w+': 'ŵ',
            'A+': 'Â',
            'E+': 'Ê ',
            'I+': 'Î',
            'O+': 'Ô',
            'U+': 'Û',
            'Y+': 'Ŷ',
            'W+': 'Ŵ'
        }

    #Parse format string
    if opts.verbose:
        print("Format string:", '"' + opts.format + '".')
    format_mappings = {'\\\\': '\\', '\\r': "\r", '\\n': "\n", '\\t': "\t"}
    for k, v in format_mappings.items():
        opts.format = opts.format.replace(k, v)

    #Process files
    if opts.verbose:
        print("Processing word list...")
        shared.progress(0, fin_size)
    count_inlines = 0
    count_outlines = 0
    for line in fin:
        count_inlines += 1
        #Split line into freq and word
        (freq, word) = line.strip().split("\t")

        #IF STRICT: Skip words with hyphens and non-Welsh characters before mapping
        if opts.strict and not set(word).issubset(welsh_chrs_strict):
            continue

        #Map CEG transcriptions onto UTF8 characters.
        for k, v in mapping.items():
            word = word.replace(k, v)

        #Skip words which have non-Welsh characters after mapping
        if not set(word).issubset(welsh_chrs_all):
            continue

        #Skip words which are more than one chr and in the English dictionaries
        #unless they are in the Welsh dictionary
        if not d_cy.check(word):
            if len(word) > 1 and (d_us.check(word) or d_gb.check(word)):
                continue

        #Format word
        formatted = opts.format.format(WORD=word, FREQ=freq)

        #Write to output file
        count_outlines += 1
        fout.write(formatted)

        #Show progress
        if opts.verbose:
            shared.progress(fin.tell(), fin_size)
    if opts.verbose:
        print("\nDone.")
    if opts.print_summary:
        print("Summary:")
        print("  Entries in Source: %s" % count_inlines)
        print("  Entries in Output: %s" % count_outlines)
        print("  Excluded Entries:  %s" % (count_inlines - count_outlines))

    #Close input and output files
    fin.close()
    fout.close()

    #Return clean exit code
    return 0
def main(args):
    with open(args.corpus_bigrams) as json_file:
        bigrams = json.load(json_file)

    # Define additional info to add to output path
    suffix = "-tok"
    sp_str = "-no_sp" if args.disable_spell_check else ""
    bigram_str = "-bi" if args.bigrams else ""
    lower_str = "-lower" if args.lower else ""
    lemma_str = "-lemma" if args.lemma else ""
    street_str = "-streets" if args.street_sub else ""
    stop_str = "-tng" if args.disable_stopwords else "" # tng = topical ngrams

    suffix += sp_str + bigram_str + lower_str + lemma_str + street_str + stop_str

    if not args.output_dir_base:
        base = args.corpus_dir if not args.filepath else os.path.dirname(args.filepath)
        output_dir = base.rstrip("/") + suffix
    else:
        output_dir = args.output_dir_base.rstrip("/") + suffix

    # Create output directory
    if not args.tsv_corpus and not os.path.exists(output_dir):
        os.mkdir(output_dir)

    if not args.filepath and not args.tsv_corpus:
        print(timestamp() + " Tokenizing data to", suffix, file=sys.stderr)

    enchant.set_param("enchant.myspell.dictionary.path", args.myspell_path)
    gb = enchant.DictWithPWL("en_GB") #, args.pwl_path) # GB isn't working, doesn't recognize 'entrancei' as "entrance i"
    gb_and_pwl = enchant.DictWithPWL("en_GB", args.pwl_path) # GB isn't working, doesn't recognize 'entrancei' as "entrance i"

    # If processing one file, don't loop!
    if args.filepath:
        if not os.path.splitext(args.filepath)[1] == ".txt":
            print(timestamp() + " Must input text file. Exiting...", file=sys.stderr)
            exit(0)
        output_file = os.path.join(output_dir, os.path.basename(args.filepath))
        if not args.overwrite and os.path.exists(output_file):
            exit(0)
        # Tokenize single file
        output = tokenize_file(args, args.filepath, gb, gb_and_pwl, bigrams)
        # Merge words if flag is set to true
        if args.merge_words:
            # Create dictionary (personal word list) out of unigrams
            pwl = enchant.request_pwl_dict(args.pwl_path)

            for i,line in enumerate(output):
                output[i] = " ".join(merge_words(args, pwl, line.split(), bigrams))

        if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file))
        # Write output to new file
        with open(output_file, "w") as f:
            f.write("\n".join(output))
        exit(0)
    else:
        if args.tsv_corpus:
            output_file = args.tsv_corpus[:-4] + suffix + ".tsv"
            if not args.overwrite and os.path.exists(output_file):
                print("File", output_file, "exists. Exiting...")
                exit(0)
            with open(args.tsv_corpus, 'r') as f:
                docs = f.read().split("\n")
                if docs[0].lower() == "id\tyear\ttext":
                    idx = 1
                    tsv_out = [docs[0]]
                else:
                    idx = 0
                    tsv_out = []
                for doc in docs[idx:]:
                    try:
                        id, year, text = doc.split("\t")
                    except ValueError: continue
                    tokenized = tokenize_line(args, text, gb, gb_and_pwl, bigrams)
                    tsv_out.append(id + "\t" + year + "\t" + tokenized)
            with open(output_file, "w") as f:
                f.write('\n'.join(tsv_out))
        else:
            # Compile list of files to tokenize
            files = [os.path.join(args.corpus_dir, f) for f in os.listdir(args.corpus_dir)
                     if (os.path.isfile(os.path.join(args.corpus_dir, f)) and f.endswith('.txt'))]

            for i in tqdm(range(len(files))):
                file = files[i]
                # Define path for new tokenized file
                output_file = os.path.join(output_dir, os.path.basename(file))
                if not args.overwrite and os.path.exists(output_file):
                    continue

                # Tokenize single file
                output = tokenize_file(args, file, gb, gb_and_pwl, bigrams)

                # Write output to new file
                with open(output_file, "w") as f:
                    f.write('\n'.join(output))
Beispiel #15
0
    if data['curPos']:
        print('[DEBUG] Cursor position loaded', data['curPos'])
        cursor.setPosition(data['curPos'])
        window.editor.setTextCursor(cursor)
else:
    newFile()
if data.get('cnt'):
    count = data['cnt']
if data.get('ast'):
    autosaveTime = data['ast']
if not data.get('w'):
    data['w'] = 700; data['h'] = 750
window.resize(data['w'], data['h'])
window.editor.setFixedHeight(window.height()-45)
window.editor.setFixedWidth(window.width())
del data

with open('style.qss') as f:
    window.editor.setStyleSheet(f.read())

en.set_param("enchant.hunspell.dictionary.path", '')

##############################################################################################################################################

loadDicts()
window.show()

autosave = AutoSave()
autosave.start()
sys.exit(app.exec_())
Beispiel #16
0
    enchant_version = 'enchant {}'.format(enchant.__version__)
except ImportError:
    enchant = None
    enchant_version = None

if sys.platform == 'win32x':
    # reset sys.platform
    sys.platform = 'win32'
    # using PyGObject's copy of libenchant means it won't find the
    # dictionaries installed with PyEnchant
    if enchant:
        for name in site.getsitepackages():
            dict_path = os.path.join(name, 'enchant', 'share', 'enchant',
                                     'myspell')
            if os.path.isdir(dict_path):
                enchant.set_param('enchant.myspell.dictionary.path', dict_path)
                break

from photini.pyqt import Qt, QtCore, QtGui, QtWidgets


class SpellCheck(QtCore.QObject):
    new_dict = QtCore.pyqtSignal()

    def __init__(self, *arg, **kw):
        super(SpellCheck, self).__init__(*arg, **kw)
        self.config_store = QtWidgets.QApplication.instance().config_store
        self.enable(eval(self.config_store.get('spelling', 'enabled', 'True')))
        self.set_dict(self.config_store.get('spelling', 'language'))

    @staticmethod
Beispiel #17
0
def prepare_environment():
    # Use locally installed dictionaries
    enchant.set_param("enchant.myspell.dictionary.path",
                      r"/data/project/kokolores/dicts/usr/share/myspell/dicts/")
Beispiel #18
0
import string

import enchant
import nltk

import oce.logger

logger = oce.logger.getLogger(__name__)

# === Config ===
from oce.config import sge_words
from oce.config import sge_chinese_derived_words, sge_malay_derived_words
from oce.langid.constants import valid_pinyin

# === Spellcheckers ===
enchant.set_param("enchant.myspell.dictionary.path", "./lib/dict")
# --- Languages and minor variants ---
spelling_languages = {
    "en": ["en_US-large", "en_GB-large"],
    "ms": ["ms_MY"],
    "sge": [],
    "zh": []
    # "sge" and "zh" handled with personal word lists below
}
# --- Corresponding dictionaries ---
spelling_dictionaries = {}
for language in spelling_languages.keys():
    spelling_dictionaries[language] = {}
    for variant in spelling_languages[language]:
        spelling_dictionaries[language][variant] = enchant.Dict(variant)
# --- SgE word lists ---
Beispiel #19
0
    enchant_version = 'enchant {}'.format(enchant.__version__)
except ImportError:
    enchant = None
    enchant_version = None

if sys.platform == 'win32x':
    # reset sys.platform
    sys.platform = 'win32'
    # using PyGObject's copy of libenchant means it won't find the
    # dictionaries installed with PyEnchant
    if enchant:
        for name in site.getsitepackages():
            dict_path = os.path.join(
                name, 'enchant', 'share', 'enchant', 'myspell')
            if os.path.isdir(dict_path):
                enchant.set_param('enchant.myspell.dictionary.path', dict_path)
                break

from photini.pyqt import Qt, QtCore, QtGui, QtWidgets


class SpellCheck(QtCore.QObject):
    new_dict = QtCore.pyqtSignal()

    def __init__(self, *arg, **kw):
        super(SpellCheck, self).__init__(*arg, **kw)
        self.config_store = QtWidgets.QApplication.instance().config_store
        self.enable(eval(self.config_store.get('spelling', 'enabled', 'True')))
        self.set_dict(self.config_store.get('spelling', 'language'))

    @staticmethod