def hello_world(): spell = SpellChecker() # find those words that may be misspelled misspelled = spell.unknown(['something', 'is', 'hapenning', 'here']) for word in misspelled: # Get the one `most likely` answer b = spell.correction(word) # Get a list of `likely` options a = spell.candidates(word) return str(b) + "\n" + str(a)
def predict(list_boxes, img): sentence = [] model = tf_tests.generate_model() for box in list_boxes: crop_img = img[box.ymin:box.ymax, box.xmin:box.xmax] if(box.ymin != box.ymax or box.xmax != box.xmin): res = tf_tests.call_classify(crop_img, model) sentence.append(res) spell = SpellChecker() sentence = [spell.correction(word[1]) if word[0] < 0.75 else word[1] for word in sentence] print("Prediction:") for element in sentence: print(element)
def fix_query_spelling(self, query): spell = SpellChecker() # find those words that may be misspelled misspelled = spell.unknown(query) # try: # for word in misspelled: # query.remove(word) # except: # pass for word in misspelled: # Get the one `most likely` answer query.append(spell.correction(word)) return query
def correctListWord(self, misspelled, fig_text): if fig_text != []: spellList = SpellChecker(language=None) # load default dictionary spellList.word_frequency.load_words(fig_text) misspelled_lower = misspelled.lower() if misspelled_lower in spellList: return (misspelled, 1.0) else: correct = spellList.correction(misspelled_lower) prob = spellList.word_probability(misspelled_lower, total_words=1) return (correct, prob) else: return (None, 0.0)
def check(text): i = 0 speller = SpellChecker() words = speller.split_words(str(text)) result = "Typo:------" corrected = [] for word in words: corrected.append(speller.correction(word)) while (i < len(words)): if (words[i] == corrected[i]): i += 1 elif (words[i] != corrected[i]): result = "Typo: " + words[i] break return result
def spell_check(text): ''' Takes in a list of word-tokenized text, spell checks each word, creates new list containining corrected words, and returns list of corrected words ''' spell = SpellChecker() misspelled = spell.unknown(text) corrected = [] for word in text: if word not in misspelled: corrected.append(word) else: corrected_word = spell.correction(word) corrected.append(corrected_word) return corrected
def spellCheck(words_to_check): """ The method will receive a query after parse, check each term and correct its spelling if a mistake is found :param words_to_check: a parsed query {term: tf in dictionary} :return: updated query dictionary with correctly spelled terms """ corrected_words_to_check = {} spell = SpellChecker() spell.word_frequency.add("coronavirus") for word in words_to_check: corrected_word = spell.correction(word) corrected_words_to_check[corrected_word] = words_to_check[word] return corrected_words_to_check
def test_correction(self): spell = SpellChecker(language='en') self.assertEqual(spell.correction('ths'), 'the') self.assertEqual(spell.correction('ergo'), 'ergo') self.assertEqual(spell.correction('alot'), 'a lot') self.assertEqual(spell.correction('this'), 'this') self.assertEqual(spell.correction('-'), '-') self.assertEqual(spell.correction('1213'), '1213') self.assertEqual(spell.correction('1213.9'), '1213.9')
def test_correction(self): ''' test spell checker corrections ''' spell = SpellChecker() self.assertEqual(spell.correction('ths'), 'the') self.assertEqual(spell.correction('ergo'), 'ergo') # self.assertEqual(spell.correction('alot'), 'a lot') self.assertEqual(spell.correction('this'), 'this') self.assertEqual(spell.correction('-'), '-') self.assertEqual(spell.correction('1213'), '1213') self.assertEqual(spell.correction('1213.9'), '1213.9')
def spellcheck_query(query): spell = SpellChecker() wordlist = query.split() misspelled = list(spell.unknown(wordlist)) corrected_wordlist = [] for word in wordlist: if word in misspelled: corrected_wordlist.append(spell.correction(word)) else: corrected_wordlist.append(word) corrected_query = ' '.join(corrected_wordlist) if not misspelled: misspell_boolean = False else: misspell_boolean = True return misspell_boolean, corrected_query
def correct_spelling(word): ''' This function implements an operation to correct any mis spelled words so that they could be as part of classification. Example: If the word is amazzziiing, then this function will correct it to amazing. :param word: :return: corrected word ''' from spellchecker import SpellChecker check = SpellChecker() ### Make a pattern to get all letters which are greater than 2 times pattern = re.compile(r"(.)\1{2,}") ### Back reference the text to the same pattern to return 2 letter matches text_corr = pattern.sub(r"\1\1", word) #Correct that with spell checker (Read: Peter Norvig blog --> http://norvig.com/spell-correct.html) return check.correction(text_corr)
def autocorrect(text, clean_text=True): if clean_text: text = clean_string(text, remove_punctuation=False) # find those words that may be misspelled words = re.sub(r'[^\w\s]', ' ', text).split() spell = SpellChecker() misspelled = spell.unknown(words) for word in misspelled: # Get the one `most likely` answer correct_word = spell.correction(word) text = text.replace(word, correct_word) return text
def check(name): try: name = re.split('[^a-zA-Z]', name) spell = SpellChecker() misspelled = name op = OrderedDict() for word in misspelled: word = spell.correction(word) word_split = wordninja.split(word) for wrd in word_split: op[wrd] = spell.candidates(wrd) return str(op) except Exception as e: return str(e)
def SpellCheck2(data): spell = SpellChecker() Spell_Words = [] # Note that this does not necessarily deal with punctuation unless you provide # a custom tokenizer words_split = nltk.word_tokenize(data) # misspelled = spell.unknown(words_split) for word in words_split: spell.word_frequency.load_words(['molded','.', '(',')']) correction = spell.correction(word) # if correction != word: # correction = colored(correction) Spell_Words.append(correction) Corrected_Words = TreebankWordDetokenizer().detokenize(Spell_Words) return Corrected_Words
def check_spell(words): d = enchant.Dict("en_US") spell = SpellChecker() misspelled = set() err_count = 0 for word in words: if d.check(word) == False: misspelled.add(word) err_count += 1 corr_dict = {} for word in misspelled: corr_dict[word] = spell.correction(word) essay_df = pd.DataFrame(words) essay_df.replace(corr_dict, inplace=True) essay = ' '.join(list(essay_df[0])) return err_count, essay
def clean_up_sentence(sentence): spell = SpellChecker() # tokenize the pattern sentence_words = nltk.word_tokenize(sentence) #Spelling correction misspelled = spell.unknown(sentence_words) for i in sentence_words: if i in misspelled: sentence_words[sentence_words.index(i)] = spell.correction(i) # stem each word sentence_words = [stemmer.stem(word.lower()) for word in sentence_words] #print("after cleaning up ",sentence_words) return sentence_words
async def spellcheck(self, ctx, arg): spell = SpellChecker() main = spell.correction(arg) others = spell.candidates(arg) otherstr = "" for others in others: otherstr = f"{otherstr}{others}, " SpellingEmbed = discord.Embed( colour = discord.Colour.light_grey() ) SpellingEmbed.set_author(name = f"Spellcheck for: {arg}", icon_url = img.ImgDictionary) SpellingEmbed.add_field(name = "Most likely spelling", value = main, inline = False) SpellingEmbed.add_field(name = "Other possible corrections", value = otherstr, inline = False) await ctx.send(embed = SpellingEmbed)
def spell_check(request): if request.method == "GET": datas = [ w['word'] for w in list(Word_frequency.objects.values('word')) ] spell = SpellChecker() spell.word_frequency.load_words(datas) target_word = request.GET['word'] misspelled = spell.unknown([target_word]) if len(misspelled) != 0: target_word = spell.correction(target_word) arr = [] arr.append({"word": target_word}) return JsonResponse(arr, safe=False) else: return HttpResponse("please use GET")
def spellcorrect(para): spell = SpellChecker() if len(para) == 0: return para para_list = para.split(' ') misspelled = spell.unknown(para_list) corrected = '' for word in para_list: if word in misspelled: corrected += spell.correction(word) else: corrected += word corrected += ' ' return corrected
def main(argv): ###--- main ---### t_ok = False textPath = "" outfile_path = os.path.join(os.getcwd(), 'report.txt') try: opts, _ = getopt.getopt(argv, "ht:o:", ["textPath=", "outfile_path="]) except getopt.GetoptError: err() for opt, arg in opts: #################################### if opt == '-h': print("#-----------------------------------#") print("options") print("t | textPath | selected img path") print("-------------------------------------") print("usage") if system == "Windows": print("imtote.exe -t <textPath> ") else: print("imtote -t <textPath>") sys.exit() #################################### if opt in ['-t', '--textPath']: textPath = arg t_ok = True #################################### if opt in ['-o', '--outfile_path']: outfile_path = arg if t_ok: with io.open(textPath, 'r') as text_file: content = text_file.readlines() print(str(content[0])) words = word_tokenize(str(content[0])) corrected = "" for word in words: spell = SpellChecker() corrected += spell.correction(word) + " " print(corrected) report = open(outfile_path, "w") report.write(corrected) report.close()
def SpellCheck(data): Spell_Words = [] spell = SpellChecker() words = spell.split_words(words) for i in data.split_words(' '): w = Word(i) spell.word_frequency.load_words(['molded','.', '(',')']) words = spell.correction(w) if words != w: words = colored(words, 'blue') #spell_word = ' '.join(words) Spell_Words.append(words) # print(Spell_Words) Corrected_Words = TreebankWordDetokenizer().detokenize(Spell_Words) return Corrected_Words
def review_sent_tokenize(review, current_db_obj): doc = nlp(review) tokenized_phrases = [str(sent) for sent in doc.sents] spell = SpellChecker() word_tokenized = [word_tokenize(word) for word in tokenized_phrases] spell_corrected = [[spell.correction(x) for x in group] for group in word_tokenized] joined_spell = [' '.join(x).capitalize() for x in spell_corrected] phrases_objs = [] for phrase in joined_spell: final_dict = {'phrase': phrase, 'review_id': current_db_obj.review_id} phrases_objs.append(final_dict) # [{'phrase': 'and i wanted to order some coffee', 'review_id': 12}, {'phrase': 'rooms are two story with narrow steps', 'review_id': 35},] return phrases_objs
def spellcheck(input_path, output_path, json_path): if json_path: spell = SpellChecker(language=None, local_dictionary=json_path) else: print('English spellcheck') spell = SpellChecker(language='en') with open(input_path, 'r', encoding='utf-8') as i_file, open(output_path, 'w', encoding='utf-8') as o_file: for line in tqdm(i_file): words = line.strip('\n').split() correct_spell = [] for word in words: correct_spell.append(spell.correction(word)) print(' '.join(correct_spell), file=o_file)
def clean_query(query): ''' Function to perform lemmatization and cleaning on query ''' # If query has apostrophe 's' replace it with " " query = re.sub("'s", "", query) query = re.sub("s'", "", query) # If query has n't, for eg- haven't, replace it with 'not'. query = re.sub("n't", " not", query) # Perform Lemmatization on query. lemmed = [ WordNetLemmatizer().lemmatize(word) for word in word_tokenize(query) if word not in STOPWORDS ] lemmed = [WordNetLemmatizer().lemmatize(word, pos='v') for word in lemmed] # lemmed = list(set(lemmed)) # Applying spell checker on query spell = SpellChecker() if os.path.exists(os.path.join(os.getcwd(), "MyDictionary.json")): spell.word_frequency.load_dictionary("MyDictionary.json") misspelled = spell.unknown(lemmed) new_query = query if len(misspelled) == 0: return lemmed, query, new_query else: correct_words = list(set(lemmed) - misspelled) correction = [] for word in misspelled: # Get the one `most likely` answer correction.append(spell.correction(word)) for i in range(len(correction)): new_query = new_query.replace(list(misspelled)[i], correction[i]) # cleaned query-- lemmed = correct_words + correction print(f"Searching for {new_query} instead of {query}") return lemmed, query, new_query
async def on_message(message): if message.author.id != 340495492377083905: return # s = ''.join(filter(str.isalnum, s)) spell = SpellChecker() msgwords = message.content.split() channel = message.channel for word in msgwords: word = ''.join(filter(str.isalpha, word)) test = spell.correction(word) if test != word: await channel.send(word + " is misspelled. i think it is spelled " + test) await bot.process_commands(message)
def correct(query): query.lower() query_tokens = query.split(' ') final_query = '' spell = SpellChecker() for word in query_tokens: correzione = '' if (isMispelled(word)): for candidate in spell.candidates(word): if candidate in queries: correzione = candidate if correzione == '': correzione = spell.correction(word) final_query += ' ' + correzione else: final_query += ' ' + word return final_query[1:]
class SpellCheck: def __init__(self, words_arr): self.spell = SpellChecker() self.words_arr = words_arr def correction(self): self.misspelled = self.spell.unknown(self.words_arr) for word in self.misspelled: correct = self.spell.correction(word) try: self.words_arr[self.words_arr.index(word)] = correct except: self.words_arr[self.words_arr.index( word.capitalize())] = correct.capitalize() return self.words_arr
def preprocess_reviews(reviews, labels): spell = SpellChecker() print("Number of observations to parse:", len(reviews)) comments = [] tags = [] for i in range(len(reviews)): if reviews[i] == "" or isinstance(reviews[i], str) == False or reviews[i] == " ": continue if i%1000 == 0: print("Update:", i) reviews[i] = re.sub(r'[!?]','.',reviews[i]) # Removing special character reviews[i] = re.sub(r'[^.a-zA-Z0-9\s]',' ',reviews[i]) # Removing special character reviews[i] = re.sub('\'',' ',reviews[i]) # Removing quotes reviews[i] = re.sub('#','',reviews[i]) # Removing quotes reviews[i] = re.sub('\d',' ',reviews[i]) # Replacing digits by space reviews[i] = re.sub(r'\s+[a-z][\s$]', ' ',reviews[i]) # Removing single characters and spaces alongside reviews[i] = re.sub(r'\s+', ' ',reviews[i]) # Replacing more than one space with a single space if 'www.' in reviews[i] or 'http:' in reviews[i] or 'https:' in reviews[i] or '.com' in reviews[i]: reviews[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", reviews[i]) reviews[i] = reviews[i].lower() reviews[i] = reviews[i].rstrip() spot = reviews[i].find(' .') while spot != -1: # Fix lone periods in comment sl = list(reviews[i]) sl[spot] = '.' sl[spot+1] = '' reviews[i] = "".join(sl) spot = reviews[i].find(' .') for word in reviews[i].split(): if word == '.': continue word_base = word.translate(str.maketrans('', '', string.punctuation)) if(bool(spell.unknown([word_base]))): recommended = spell.correction(word_base) if (recommended in words.words()): reviews[i] = reviews[i].replace(word,recommended,1) else: reviews[i] = reviews[i].replace(word, '') reviews[i] = re.sub(r'\s+', ' ',reviews[i]) # Replacing more than one space with a single space reviews[i] = reviews[i].replace('..', '.') if reviews[i].find('.') == 0: reviews[i] = reviews[i].replace('.', '', 1) reviews[i] = reviews[i].replace(' ', '', 1) comments.append(reviews[i]) tags.append(labels[i]) return comments, tags
def _spell_check(self, words): spell = SpellChecker() tokens = self._tokenization(words) misspelled = spell.unknown(tokens) if misspelled: for i in misspelled: # print("Unknown: {}".format(i)) # print("Correction: {}".format(spell.correction(i))) # value = input("Correct?\n") # if value == "y": words.replace(i, spell.correction(i)) self.unwords += 1 if 'io' in words: words.replace('io', 'iot') if 'iiot' in words: words.replace('iiot', 'iot') return words
def spelling_correction(self): """ Corrects the spelling of all words in the corpus (e.g. "hii -> hi") """ # TODO: # Check whether it is only slow if it has lots of things to correct # Check whether it can be sped up by GPU # Check whether running in parallel increases speed # Check whether there are faster libraries spell = SpellChecker() for i, sentence in enumerate(self.corpus): for j, word in enumerate(sentence): word = spell.correction(word) self.corpus[i][j] = word
clear = lambda: os.system('cls') nxt = (0) nxts =(4) nogt = (0) noot = (1) myname = ("jarvia") commands = [] action = [] print ("hello") name = input ("what is your name>>> ") print ("hello " + name) while 1==1: response = input("Main>>> ") st1=response.split() for x in range(len(st1)): st1[x]=(spell.correction(st1[x])) response=" ".join(str(x) for x in st1) if response == "what time is it" or response=='what is the time': print(time.strftime("%I")+':'+time.strftime('%M %p')) elif response == ("what is your favorite color"): print("purple") elif response == ("who are you"): print ("jarvia") elif response == "what are you": print ("an AI") elif response == ("cool"): print ("mhm") elif ('what can you do') in response: rand = ('I can do Tasks as Playing Music, Videos, Opening any file, websites,Google Search,Movie Search, Put Computer to sleep, Arithmatic Operations, Normal Conversations, Jokes and many more') elif('goodbye') in response: print('Ok goodbye')
#!/usr/bin/env python ''' pip install pyspellchecker ''' from spellchecker import SpellChecker spell = SpellChecker() # find those words that may be misspelled misspelled = spell.unknown(['let', 'us', 'wlak','on','the','groun']) for word in misspelled: # Get the one `most likely` answer print(spell.correction(word)) # Get a list of `likely` options print(spell.candidates(word))