def ComputeSpellingError(rawtext, mydict): d = enchant.DictWithPWL("en_US", mydict) tokens = [w for w in re.split(' ', rawtext.lower()) if not w == ''] tokens = [re.sub(r'[^a-z]', '', w) for w in tokens] tokens = [w for w in tokens if not w == ''] CountInDict = 0 CountNotInDict = 0 CountTotal = len(tokens) if CountTotal > 0: for word in tokens: if len(word) == 1: CountNotInDict += 1 elif d.check(word): CountInDict += 1 else: CountNotInDict += 1 Ratio = str(round(CountInDict / CountTotal, 2)) else: Ratio = str(0) TotalWord = str(CountTotal) Output = [TotalWord, Ratio] return Output
def _getDict(cls, lang, pwl="", pel=""): """ Protected class method to get a new dictionary. @param lang the language to be used as the default (string). The string should be in language locale format (e.g. en_US, de). @keyparam pwl name of the personal/project word list (string) @keyparam pel name of the personal/project exclude list (string) @return reference to the dictionary (enchant.Dict) """ if not pwl: pwl = SpellChecker.getUserDictionaryPath() d = os.path.dirname(pwl) if not os.path.exists(d): os.makedirs(d) if not pel: pel = SpellChecker.getUserDictionaryPath(False) d = os.path.dirname(pel) if not os.path.exists(d): os.makedirs(d) try: d = enchant.DictWithPWL(lang, pwl, pel) except Exception: # Catch all exceptions, because if pyenchant isn't available, you # can't catch the enchant.DictNotFound error. d = None return d
def open_dict_file(self, fn): """Open or create the dict with the given fn.""" language = self.language if not fn or not language: return None if g.app.spellDict: return g.app.spellDict if not g.os_path_exists(fn): # Fix bug 1175013: leo/plugins/spellpyx.txt is # both source controlled and customized. self.create(fn) if g.os_path_exists(fn): # Merge the local and global dictionaries. try: self.clean_dict(fn) d = enchant.DictWithPWL(language, fn) except Exception: # This is off-putting, and not necessary. # g.es('Error reading dictionary file', fn) # g.es_exception() d = enchant.Dict(language) else: # A fallback. Unlikely to happen. d = enchant.Dict(language) return d
def open_dict(self, fn, language): '''Open or create the dict with the given fn.''' trace = False and not g.unitTesting if not fn or not language: return d = g.app.spellDict if d: self.d = d if trace: g.trace('already open', self.c.fileName(), fn) return if not g.os_path_exists(fn): # Fix bug 1175013: leo/plugins/spellpyx.txt is both source controlled and customized. self.create(fn) if g.os_path_exists(fn): # Merge the local and global dictionaries. try: self.clean_dict(fn) self.d = enchant.DictWithPWL(language, fn) if trace: g.trace('open', g.shortFileName(self.c.fileName()), fn) except Exception: g.es_exception() g.error('not a valid dictionary file', fn) self.d = enchant.Dict(language) else: # A fallback. Unlikely to happen. self.d = enchant.Dict(language) # Use only a single copy of the dict. g.app.spellDict = self.d
def __init__(self, server): self.name = server.name self.server = server os.makedirs(server.get_config_dir(), exist_ok=True) self.dictionary = enchant.DictWithPWL( "en_US", pwl=server.get_config_dir("ircwords")) self.alternate = enchant.Dict("en_GB") try: self.locked = open(server.get_config_dir( self.LOCKFILE)).read().split("\n") except: self.locked = [] open(server.get_config_dir(self.LOCKFILE), "w") self.db = server.get_config_dir(self.DBFILE) if not os.path.exists(self.db): os.makedirs(server.get_config_dir(), exist_ok=True) # Initialise the db with sqlite3.connect(self.db) as db: db.execute( "CREATE TABLE typos (timestamp int, nick text, channel text, server text, word text);" ) db.execute( "CREATE TABLE settings (server text, context text, threshhold int);" ) self.dictionary._add = self.dictionary.add self.dictionary.add = lambda x: self.dictionary._add( x) if "\n" not in x else sys.__stdout__.write("f**k you.") server.spellcheck = self.spellcheck super().__init__(server)
def setLanguage(self, theLang, projectDict=None): if projectDict is None: self.theDict = enchant.Dict(theLang) else: self.theDict = enchant.DictWithPWL(theLang, projectDict) logger.debug("Enchant spell checking for language %s loaded" % theLang) return
def getPWL(): print "loading personal word list" pwl = enchant.DictWithPWL(None, umls_words_file_path) print "finished loading pwl" return pwl
def __init__(self, name): self._lang = name if not (self._lang and enchant.dict_exists(self._lang)): self._lang = self.getDefaultDictionary() self._dict = enchant.DictWithPWL(self._lang, self.getCustomDictionaryPath())
def correctText(text): print("*" * 78) mistakes = 0 text = text.casefold().split(" ") for word in text: w = enchant.DictWithPWL("en_US", "EnglishWords.txt") if w.check(word) == False: wrongWord = get_close_matches(word, data.keys(), n=5) list2String = ", ".join(wrongWord) if len(wrongWord) > 0: print("[-] " + word + " Is Not an English Word, Do U Mean : " + list2String + " ?") mistakes += 1 else: print( "[-] " + word + " Is Not an English Word, But I Don't Know What The F**k Do u Mean" ) mistakes += 1 if mistakes == 0: print("[+] Its All Correct !") else: print("*" * 78) print("[-] You Have " + str(mistakes) + " Mistakes !")
def create_dict(): d = enchant.DictWithPWL('en_US') # read extra names from config file config = configparser.ConfigParser() config.read(_config_file) try: extra_names = config.get('DICTIONARY', 'add').split(',') except (configparser.NoSectionError, configparser.NoOptionError): # No section: 'DICTIONARY', No option 'add' in section: 'DICTIONARY' extra_names = [] for name in extra_names: name = name.strip() d.add(name) d.add(name + '\'s') if name[-1] == 's': d.add(name + '\'') try: blacklist = config.get('DICTIONARY', 'remove').split(',') except (configparser.NoSectionError, configparser.NoOptionError): blacklist = [] for word in blacklist: d.remove(word.strip()) return d
def open(self): self.initialized = False self.private_dict_file = None if enchant is None: return dict_name = self.config.spelling_dict if not dict_name: return self.ignore_list = self.config.spelling_ignore_words.split(",") # "param" appears in docstring in param description and # "pylint" appears in comments in pylint pragmas. self.ignore_list.extend(["param", "pylint"]) if self.config.spelling_private_dict_file: self.spelling_dict = enchant.DictWithPWL( dict_name, self.config.spelling_private_dict_file) self.private_dict_file = open( self.config.spelling_private_dict_file, "a") else: self.spelling_dict = enchant.Dict(dict_name) if self.config.spelling_store_unknown_words: self.unknown_words = set() # Prepare regex for stripping punctuation signs from text. # ' and _ are treated in a special way. puncts = string.punctuation.replace("'", "").replace("_", "") self.punctuation_regex = re.compile('[%s]' % re.escape(puncts)) self.initialized = True
def validate_spelling(tree, filename, options): """ Check spelling of text within tags. If options['learn'], then unknown words will be added to the dictionary. """ result = True learn = [] speller = enchant.DictWithPWL("en_US", VOCABULARY) if not speller: options['spelling'] = False return result try: root = tree.getroot() for section in root.iter(): if section.text and isinstance(section.tag, basestring) and \ section.tag not in ('a', 'code', 'monospace', 'pre'): for word in re.findall('([a-zA-Z]+\'?[a-zA-Z]+)', section.text): if not speller.check(word): if word.upper() not in (learned.upper() for learned in learn): learn.append(word) result = False logging.warning('Misspelled (unknown) word %s in %s', word.encode('utf-8'), filename) except: print('[-] Hmm. spell exception') if options['learn'] and learn: try: with open(VOCABULARY, mode='a+') as open_file: for word in learn: open_file.write(word + '\n') except IOError: logging.error('Could not write to %s', open_file) return result
def __init__(self, exec_by_ibus): engine_name = "bogo" long_engine_name = "BoGo" author = "BoGo Development Team <*****@*****.**>" description = "ibus-bogo for IBus" version = "0.4" license = "GPLv3" self.component = \ IBus.Component.new("org.freedesktop.IBus.BoGo", description, version, license, author, "https://github.com/BoGoEngine/ibus-bogo", "/usr/bin/exec", "ibus-bogo") engine = IBus.EngineDesc( name=engine_name, longname=long_engine_name, description=description, language="vi", license=license, author=author, icon=current_path + "/data/ibus-bogo-dev.svg", # icon = "ibus-bogo", layout="default") self.component.add_engine(engine) self.mainloop = GObject.MainLoop() self.bus = IBus.Bus() self.bus.connect("disconnected", self.bus_disconnected_cb) self.engine_count = 0 self.factory = IBus.Factory.new(self.bus.get_connection()) self.factory.connect("create-engine", self.create_engine) CONFIG_DIR = os.path.expanduser("~/.config/ibus-bogo/") self.config = Config() self.abbr_expander = AbbreviationExpander(config=self.config) self.abbr_expander.watch_file(CONFIG_DIR + "/abbr_rules.json") if exec_by_ibus: self.bus.request_name("org.freedesktop.IBus.BoGo", 0) else: self.bus.register_component(self.component) self.bus.set_global_engine_async("bogo", -1, None, None, None) custom_broker = enchant.Broker() custom_broker.set_param('enchant.myspell.dictionary.path', DICT_PATH) spellchecker = enchant.DictWithPWL('vi_VN_telex', pwl=PWL_PATH, broker=custom_broker) # FIXME: Catch enchant.errors.DictNotFoundError exception here. english_spellchecker = enchant.Dict('en_US') self.auto_corrector = AutoCorrector(self.config, spellchecker, english_spellchecker)
def open_dict_file(self, fn): '''Open or create the dict with the given fn.''' trace = False and not g.unitTesting language = self.language if not fn or not language: return None if g.app.spellDict: if trace: g.trace('already open', self.c.fileName(), fn) return g.app.spellDict if not g.os_path_exists(fn): # Fix bug 1175013: leo/plugins/spellpyx.txt is # both source controlled and customized. self.create(fn) if g.os_path_exists(fn): # Merge the local and global dictionaries. try: self.clean_dict(fn) d = enchant.DictWithPWL(language, fn) if trace: g.trace('open', g.shortFileName(self.c.fileName()), fn) except Exception: g.es('Error reading dictionary file', fn) g.es_exception() d = enchant.Dict(language) else: # A fallback. Unlikely to happen. d = enchant.Dict(language) return d
def create_dict(): d = enchant.DictWithPWL("en_US") # read extra names from config file config = configparser.ConfigParser() config.read(_config_file) try: extra_names = config.get("DICTIONARY", "add").split(",") except (configparser.NoSectionError, configparser.NoOptionError): # No section: 'DICTIONARY', No option 'add' in section: 'DICTIONARY' extra_names = [] for name in extra_names: name = name.strip() d.add(name) d.add(name + "'s") if name[-1] == "s": d.add(name + "'") try: blacklist = config.get("DICTIONARY", "remove").split(",") except (configparser.NoSectionError, configparser.NoOptionError): blacklist = [] for word in blacklist: d.remove(word.strip()) return d
def check_sentence(self, sentence): d = enchant.DictWithPWL("en_GB", "slang.txt") if self.check_URL(sentence) == True: return False sentence = sentence.translate(str.maketrans('', '', string.punctuation)) words = sentence.split(" ") good_words = [] for word in words: try: is_word = d.check(word) if is_word: good_words.append(True) else: good_words.append(False) except: pass valid_words = sum(good_words) percentage = (valid_words / len(words)) * 100 if percentage <= 50: return True else: return False
def output_pwl(filename=Lexicon): spellchecker = enchant.DictWithPWL('en_US',filename) Valid = [] Unknown = [] for word in Senses: if spellchecker.check(word): Valid.append(word) else: Unknown.append(word) print 'Found:', Valid print 'Unknown:' Unmatched = [] Matched = [] for w in Unknown: suggestions = spellchecker.suggest(w) match = '' for s in suggestions: if spellchecker.pwl.check(s): match = s break print ' ', w, 'Match: "'+match+'"', suggestions if match: Matched.append((w,match)) else: Unmatched.append(w) Matched.sort() Unmatched.sort() print 'Matched:' for M in Matched: print M print 'Unmatched', Unmatched WordList = Valid+Unmatched WordList.sort() return WordList
def scanImage(chat_id, file_path): im = Image.open( file_path) #NOTE: WITH 500X500 IMG NO PROBLEM TO READ THE TEXT text = '' enchanceIndex = 1 #per fare una prova ho visto che da 12 in poi legge bene w, h = im.size im.crop((0, 0, w, h - 250)).save("temp.jpg") im2 = Image.open("temp.jpg") #.convert('L') #im2.show() ReadedText = [] enhancer = ImageEnhance.Contrast(im2) im2 = im2.filter(ImageFilter.MinFilter(3)) d = enchant.DictWithPWL("en_US", "MagicCardName.txt") while enchanceIndex <= 15: #Testing im2 = enhancer.enhance(enchanceIndex) im2 = im2.convert('1') #im2.show() text = (pytesseract.image_to_string(im2, lang='ita')) #print (text) print('\nValore contrasto= ', enchanceIndex) enchanceIndex += 1 if text != '': #ReadedText.append(text) print('\n---------Name of Cards---------\n') print('Testo rilevato ', text) print('Testi suggeriti ', d.suggest(text)) suggerimenti = d.suggest(text) if (len(suggerimenti) > 0): print('Ricerca...') for s in suggerimenti: if s == text: cardToSearch = s else: cardToSearch = suggerimenti[ 0] #quella con maggior probabilità di essere esatta print('Cerca -> ', cardToSearch) cards = Card.where(name=cardToSearch).all() if (len(cards) > 0): #for c in cards: print(cards[0].name, ' ', cards[0].cmc, cards[0].colors) send_message( str(cards[0].name) + " " + str(cards[0].cmc) + " " + str(cards[0].colors), chat_id) break else: cardsITA = Card.where(language="Italian").where( name=cardToSearch).all() if (len(cardsITA) > 0): #for c in cardsITA: print(cardsITA[0].name, ' ', ' costo= ', cardsITA[0].cmc, ' colore= ', cardsITA[0].colors) send_message( str(cardsITA[0].name) + " " + str(cardsITA[0].cmc) + " " + str(cardsITA[0].colors), chat_id) break send_message("Mi dispiace ma non sono riuscito a decifrare la foto", chat_id)
def correctword(words): # call build-in dictionary and self-added dictionary pwl = enchant.request_pwl_dict( "/Users/lxy/PycharmProjects/data mining/enwiktionary.txt") d_gb = enchant.Dict("en_GB") d_g = enchant.DictWithPWL( "grc_GR", "/Users/lxy/PycharmProjects/data mining/enwiktionary.txt") return [word for word in words if d_gb.check(word) or d_g.check(word)]
def check_nonwords(fr_words0): d_fr = enchant.DictWithPWL("fr", "hp_words_fr.txt") # still will need to add some words to a text file that are hp-specific non_words = set() for w in fr_words0: if not d_fr.check(w): non_words.add(w) print len(non_words) return non_words
def __init__(self): self.logger = NsLog("log") self.path_data = "data/" self.name_brand_file = "allbrand.txt" self.dictionary_en = enchant.DictWithPWL( "en_US", self.path_data + self.name_brand_file) #self.__file_capitalize(self.path_data, self.name_brand_file) self.pp = pprint.PrettyPrinter(indent=4)
def __init__(self): self.logger = ns_log.NsLog("log") self.path_data = "../data/" self.name_brand_file = "All_Brand.txt" self.dictionary_en = enchant.DictWithPWL( "en_US", "{0}{1}".format(self.path_data, self.name_brand_file)) #self.__file_capitalize(self.path_data, self.name_brand_file) self.pp = pprint.PrettyPrinter(indent=4)
def fix_spelling(self): if self.country == "US": dict_check = enchant.DictWithPWL("en_CA", "data/words") dict_correct = enchant.DictWithPWL("en_US", "data/words") else: dict_correct = enchant.DictWithPWL("en_CA", "data/words") dict_check = enchant.DictWithPWL("en_US", "data/words") wordlist = re.sub("[^\w]", " ", self.raw).split() done = [] for word in wordlist: word = word.replace("_", "") if not word.isdigit() and len(word) > 0: if not dict_correct.check(word) and dict_check.check(word): if not word in done: new = dict_correct.suggest(word) choice = "" if self.input_type == "qt": choice = "0" else: if self.country == "US": print("Non-American Word - *" + word + "* Replace with? ") else: print("Non-Canadian Word - *" + word + "* Replace with? ") for counter, option in enumerate(new): print(str(counter) + " - " + option) if counter > 10: break print("Don't replace - q") choice = input("Select Replacment\n") if choice != "q": if self.input_type == "article": new_word = str(new[int(choice)]) self.content = self.content.replace( str(word), "<font color='red'>" + str(new_word) + "</font>") else: self.safe_replace(word, new[int(choice)]) done.append(word)
def __init__(self, lang, suggest, word_list_filename, tokenizer_lang='en_US', filters=[]): self.dictionary = enchant.DictWithPWL(lang, word_list_filename) self.tokenizer = get_tokenizer(tokenizer_lang, filters) self.original_tokenizer = self.tokenizer self.suggest = suggest
def open(self): self.initialized = False self.private_dict_file = None if enchant is None: return dict_name = self.linter.namespace.spelling_dict if not dict_name: return self.ignore_list = [ w.strip() for w in self.linter.namespace.spelling_ignore_words.split(",") ] # "param" appears in docstring in param description and # "pylint" appears in comments in pylint pragmas. self.ignore_list.extend(["param", "pylint"]) self.ignore_comment_directive_list = [ w.strip() for w in self.linter.namespace.spelling_ignore_comment_directives.split(",") ] # Expand tilde to allow e.g. spelling-private-dict-file = ~/.pylintdict if self.linter.namespace.spelling_private_dict_file: self.linter.namespace.spelling_private_dict_file = os.path.expanduser( self.linter.namespace.spelling_private_dict_file ) if self.linter.namespace.spelling_private_dict_file: self.spelling_dict = enchant.DictWithPWL( dict_name, self.linter.namespace.spelling_private_dict_file ) self.private_dict_file = open( # pylint: disable=consider-using-with self.linter.namespace.spelling_private_dict_file, "a", encoding="utf-8" ) else: self.spelling_dict = enchant.Dict(dict_name) if self.linter.namespace.spelling_store_unknown_words: self.unknown_words = set() self.tokenizer = get_tokenizer( dict_name, chunkers=[ForwardSlashChunker], filters=[ EmailFilter, URLFilter, WikiWordFilter, WordsWithDigitsFilter, WordsWithUnderscores, CamelCasedWord, SphinxDirectives, ], ) self.initialized = True
def suggest(self): if re.sub(r'[a-zA-Z\d\'\-\.\s]', '', self.word): return None import enchant try: d = enchant.DictWithPWL( 'en_US', path + '/data/spell-checker/american-english-large') except: d = enchant.Dict('en_US') suggestion = d.suggest(self.word) return suggestion
def __init__(self, type_v, name_file, dict_root='../../dictionaries/', output_root='../../data/misspelled_corpora/'): self.dictionary = enchant.DictWithPWL( "en_US", dict_root + 'multilabel_dic_unique_order.csv') self.fname = name_file self.type_v = type_v self.output_root = output_root self.output_fname = 'non_' + self.type_v
def _change_language(self, language: str) -> None: if not language: self.error('No language specified') return try: pwl = self.pwl_path / (language + '.pwl') self.language_dict = enchant.DictWithPWL(language, pwl=str(pwl)) except enchant.errors.DictNotFoundError: self.error(f'Invalid language: {language}') else: self.language = language self.rehighlight.emit()
def main(): parser = argparse.ArgumentParser( formatter_class = argparse.RawDescriptionHelpFormatter, description = DESCRIPTION, epilog = EPILOG) debug = False parser.add_argument("-d", "--debug", action = "store_true", help = "Enable Helpful Debug Messages") parser.add_argument("in_file", nargs = 1, type = str, help = "Specify the file to check") args = parser.parse_args() if args.debug: print ("Debug Enable") debug = True if debug: print "Openning file: %s" % args.in_file[0] f = open(args.in_file[0], 'r') data = f.read() if debug: print "Getting an instance of a dictionary..." d = enchant.DictWithPWL("en_US") if debug: print "Adding a custom name" name = "yingzi" d.add(name) if debug: print "Checking if the custom name we just entered is in the dictioary..." print "\tis %s in the personal dictionary: %s" % (name, d.is_added(name)) if debug: print "Checking if a random custom name is in the dictioary..." print "\tis %s in the personal dictionary: %s" % ("bob", d.is_added("bob")) if debug: print "Splitting up text in document to a list of signal words" words = data.split(" ") if debug: print "Go through each word and see if it is correct" error_count = 0 for word in words: #Remove white spaces word = word.strip() word = string.strip(word, string.punctuation) if not d.check(word) and not d.is_added(word): print "%s is incorrect, possible alternatives:" % word for w in d.suggest(word): print "\t%s" % w
def __init__(self): HTMLParser.__init__(self) self.__spell_check_res = {} self.__grammar_check_res = None self.__ignore_tag = False self.__is_code_block = False self.__in_code_block = False self.__dictionary = enchant.DictWithPWL( 'en_US', 'web-data/mxnet/doc/ignored_words.txt') self.__spell_checker = SpellChecker(self.__dictionary) self.__parsed_content = "" self.__grammar_checker = grammar_check.LanguageTool('en-US')