예제 #1
0
def hello_world():

    spell = SpellChecker()

    # find those words that may be misspelled
    misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])

    for word in misspelled:
        # Get the one `most likely` answer
        b = spell.correction(word)

        # Get a list of `likely` options
        a = spell.candidates(word)

    return str(b) + "\n" + str(a)
예제 #2
0
def predict(list_boxes, img):
    sentence = []
    model = tf_tests.generate_model()
    for box in list_boxes:
        crop_img = img[box.ymin:box.ymax, box.xmin:box.xmax]
        if(box.ymin != box.ymax or box.xmax != box.xmin):
            res = tf_tests.call_classify(crop_img, model)
            sentence.append(res)

    spell = SpellChecker()
    sentence = [spell.correction(word[1]) if word[0] < 0.75 else word[1] for word in sentence]

    print("Prediction:")
    for element in sentence:
        print(element)
예제 #3
0
    def fix_query_spelling(self, query):

        spell = SpellChecker()

        # find those words that may be misspelled
        misspelled = spell.unknown(query)
        # try:
        #     for word in misspelled:
        #         query.remove(word)
        # except:
        #     pass
        for word in misspelled:
            # Get the one `most likely` answer
            query.append(spell.correction(word))
        return query
예제 #4
0
파일: SpellCorrect.py 프로젝트: saoruy/DCC
    def correctListWord(self, misspelled, fig_text):
        if fig_text != []:
            spellList = SpellChecker(language=None)  # load default dictionary
            spellList.word_frequency.load_words(fig_text)
            misspelled_lower = misspelled.lower()

            if misspelled_lower in spellList:
                return (misspelled, 1.0)
            else:
                correct = spellList.correction(misspelled_lower)
                prob = spellList.word_probability(misspelled_lower,
                                                  total_words=1)
                return (correct, prob)
        else:
            return (None, 0.0)
예제 #5
0
 def check(text):
     i = 0
     speller = SpellChecker()
     words = speller.split_words(str(text))
     result = "Typo:------"
     corrected = []
     for word in words:
         corrected.append(speller.correction(word))
     while (i < len(words)):
         if (words[i] == corrected[i]):
             i += 1
         elif (words[i] != corrected[i]):
             result = "Typo: " + words[i]
             break
     return result
예제 #6
0
def spell_check(text):
    '''
    Takes in a list of word-tokenized text, spell checks each word, creates new list containining corrected words,
    and returns list of corrected words
    '''
    spell = SpellChecker()
    misspelled = spell.unknown(text)
    corrected = []
    for word in text:
        if word not in misspelled:
            corrected.append(word)
        else:
            corrected_word = spell.correction(word)
            corrected.append(corrected_word)
    return corrected
예제 #7
0
    def spellCheck(words_to_check):
        """
        The method will receive a query after parse, check each term and correct its spelling if a mistake is found
        :param words_to_check: a parsed query {term: tf in dictionary}
        :return: updated query dictionary with correctly spelled terms
        """
        corrected_words_to_check = {}
        spell = SpellChecker()
        spell.word_frequency.add("coronavirus")

        for word in words_to_check:
            corrected_word = spell.correction(word)
            corrected_words_to_check[corrected_word] = words_to_check[word]

        return corrected_words_to_check
예제 #8
0
 def test_correction(self):
     spell = SpellChecker(language='en')
     
     self.assertEqual(spell.correction('ths'), 'the')
     self.assertEqual(spell.correction('ergo'), 'ergo')
     self.assertEqual(spell.correction('alot'), 'a lot')
     self.assertEqual(spell.correction('this'), 'this')
     self.assertEqual(spell.correction('-'), '-')
     self.assertEqual(spell.correction('1213'), '1213')
     self.assertEqual(spell.correction('1213.9'), '1213.9')
예제 #9
0
 def test_correction(self):
     ''' test spell checker corrections '''
     spell = SpellChecker()
     self.assertEqual(spell.correction('ths'), 'the')
     self.assertEqual(spell.correction('ergo'), 'ergo')
     # self.assertEqual(spell.correction('alot'), 'a lot')
     self.assertEqual(spell.correction('this'), 'this')
     self.assertEqual(spell.correction('-'), '-')
     self.assertEqual(spell.correction('1213'), '1213')
     self.assertEqual(spell.correction('1213.9'), '1213.9')
예제 #10
0
def spellcheck_query(query):
    spell = SpellChecker()
    wordlist = query.split()
    misspelled = list(spell.unknown(wordlist))
    corrected_wordlist = []
    for word in wordlist:
        if word in misspelled:
            corrected_wordlist.append(spell.correction(word))
        else:
            corrected_wordlist.append(word)
    corrected_query = ' '.join(corrected_wordlist)
    if not misspelled:
        misspell_boolean = False
    else:
        misspell_boolean = True
    return misspell_boolean, corrected_query
예제 #11
0
def correct_spelling(word):
    '''
    This function implements an operation to correct any mis spelled words so that they could be as part of classification.
    Example: If the word is amazzziiing, then this function will correct it to amazing.

    :param word:
    :return: corrected word
    '''
    from spellchecker import SpellChecker
    check = SpellChecker()
    ### Make a pattern to get all letters which are greater than 2 times
    pattern = re.compile(r"(.)\1{2,}")
    ### Back reference the text to the same pattern to return 2 letter matches
    text_corr = pattern.sub(r"\1\1", word)
    #Correct that with spell checker (Read: Peter Norvig blog --> http://norvig.com/spell-correct.html)
    return check.correction(text_corr)
예제 #12
0
def autocorrect(text, clean_text=True):
    if clean_text:
        text = clean_string(text, remove_punctuation=False)

    # find those words that may be misspelled
    words = re.sub(r'[^\w\s]', ' ', text).split()

    spell = SpellChecker()
    misspelled = spell.unknown(words)

    for word in misspelled:
        # Get the one `most likely` answer
        correct_word = spell.correction(word)
        text = text.replace(word, correct_word)

    return text
예제 #13
0
def check(name):
    try:
        name = re.split('[^a-zA-Z]', name)
        spell = SpellChecker()
        misspelled = name
        op = OrderedDict()

        for word in misspelled:
            word = spell.correction(word)
            word_split = wordninja.split(word)
            for wrd in word_split:
                op[wrd] = spell.candidates(wrd)
        return str(op)

    except Exception as e:
        return str(e)
def SpellCheck2(data):
    spell = SpellChecker()
    Spell_Words = []
# Note that this does not necessarily deal with punctuation unless you provide
# a custom tokenizer
    words_split = nltk.word_tokenize(data) 
    # misspelled = spell.unknown(words_split)
    for word in words_split:
        spell.word_frequency.load_words(['molded','.', '(',')'])
        correction = spell.correction(word)
        # if correction != word:  
        #     correction = colored(correction)
        Spell_Words.append(correction)
    
    Corrected_Words = TreebankWordDetokenizer().detokenize(Spell_Words)
    return Corrected_Words
예제 #15
0
def check_spell(words):
    d = enchant.Dict("en_US")
    spell = SpellChecker()
    misspelled = set()
    err_count = 0
    for word in words:
        if d.check(word) == False:
            misspelled.add(word)
            err_count += 1
    corr_dict = {}
    for word in misspelled:
        corr_dict[word] = spell.correction(word)
    essay_df = pd.DataFrame(words)
    essay_df.replace(corr_dict, inplace=True)
    essay = ' '.join(list(essay_df[0]))
    return err_count, essay
예제 #16
0
def clean_up_sentence(sentence):
    spell = SpellChecker()
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)

    #Spelling correction
    misspelled = spell.unknown(sentence_words)
    for i in sentence_words:
        if i in misspelled:
            sentence_words[sentence_words.index(i)] = spell.correction(i)

    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]

    #print("after cleaning up ",sentence_words)
    return sentence_words
예제 #17
0
    async def spellcheck(self, ctx, arg):
        spell = SpellChecker()
        main = spell.correction(arg)
        others = spell.candidates(arg)
        otherstr = ""
        for others in others:
            otherstr = f"{otherstr}{others}, "

        SpellingEmbed = discord.Embed(
            colour = discord.Colour.light_grey()
        )
        SpellingEmbed.set_author(name = f"Spellcheck for: {arg}", icon_url = img.ImgDictionary)
        SpellingEmbed.add_field(name = "Most likely spelling", value = main, inline = False)
        SpellingEmbed.add_field(name = "Other possible corrections", value = otherstr, inline = False)

        await ctx.send(embed = SpellingEmbed)
예제 #18
0
파일: views.py 프로젝트: gian4193/ir_hw
def spell_check(request):
    if request.method == "GET":
        datas = [
            w['word'] for w in list(Word_frequency.objects.values('word'))
        ]
        spell = SpellChecker()
        spell.word_frequency.load_words(datas)
        target_word = request.GET['word']
        misspelled = spell.unknown([target_word])
        if len(misspelled) != 0:
            target_word = spell.correction(target_word)
        arr = []
        arr.append({"word": target_word})
        return JsonResponse(arr, safe=False)
    else:
        return HttpResponse("please use GET")
예제 #19
0
def spellcorrect(para):

    spell = SpellChecker()
    if len(para) == 0:
        return para
    para_list = para.split(' ')
    misspelled = spell.unknown(para_list)
    corrected = ''
    for word in para_list:
        if word in misspelled:
            corrected += spell.correction(word)
        else:
            corrected += word
        corrected += ' '

    return corrected
예제 #20
0
def main(argv):
    ###---        main        ---###
    t_ok = False
    textPath = ""
    outfile_path = os.path.join(os.getcwd(), 'report.txt')

    try:
        opts, _ = getopt.getopt(argv, "ht:o:", ["textPath=", "outfile_path="])
    except getopt.GetoptError:
        err()

    for opt, arg in opts:
        ####################################
        if opt == '-h':
            print("#-----------------------------------#")
            print("options")
            print("t | textPath         | selected  img path")
            print("-------------------------------------")
            print("usage")
            if system == "Windows":
                print("imtote.exe -t <textPath> ")
            else:
                print("imtote -t <textPath>")
            sys.exit()
        ####################################
        if opt in ['-t', '--textPath']:
            textPath = arg
            t_ok = True
        ####################################

        if opt in ['-o', '--outfile_path']:
            outfile_path = arg

    if t_ok:
        with io.open(textPath, 'r') as text_file:
            content = text_file.readlines()
            print(str(content[0]))
            words = word_tokenize(str(content[0]))
            corrected = ""
            for word in words:
                spell = SpellChecker()
                corrected += spell.correction(word) + " "
            print(corrected)

            report = open(outfile_path, "w")
            report.write(corrected)
            report.close()
예제 #21
0
def SpellCheck(data):
    Spell_Words = []
    spell = SpellChecker()
    words = spell.split_words(words)
    for i in data.split_words(' '):
        w = Word(i)
        spell.word_frequency.load_words(['molded','.', '(',')'])
        words = spell.correction(w)
        if words != w:
            words = colored(words, 'blue')

        #spell_word = ' '.join(words)
        Spell_Words.append(words)

    # print(Spell_Words)
    Corrected_Words = TreebankWordDetokenizer().detokenize(Spell_Words)
    return Corrected_Words
예제 #22
0
def review_sent_tokenize(review, current_db_obj):

    doc = nlp(review)
    tokenized_phrases = [str(sent) for sent in doc.sents]
    spell = SpellChecker()
    word_tokenized = [word_tokenize(word) for word in tokenized_phrases]
    spell_corrected = [[spell.correction(x) for x in group]
                       for group in word_tokenized]
    joined_spell = [' '.join(x).capitalize() for x in spell_corrected]

    phrases_objs = []
    for phrase in joined_spell:
        final_dict = {'phrase': phrase, 'review_id': current_db_obj.review_id}
        phrases_objs.append(final_dict)

    # [{'phrase': 'and i wanted to order some coffee', 'review_id': 12}, {'phrase': 'rooms are two story with narrow steps', 'review_id': 35},]
    return phrases_objs
def spellcheck(input_path, output_path, json_path):
    if json_path:
        spell = SpellChecker(language=None, local_dictionary=json_path)
    else:
        print('English spellcheck')
        spell = SpellChecker(language='en')

    with open(input_path, 'r',
              encoding='utf-8') as i_file, open(output_path,
                                                'w',
                                                encoding='utf-8') as o_file:
        for line in tqdm(i_file):
            words = line.strip('\n').split()
            correct_spell = []
            for word in words:
                correct_spell.append(spell.correction(word))
            print(' '.join(correct_spell), file=o_file)
예제 #24
0
def clean_query(query):
    '''
    Function to perform lemmatization and cleaning on query
    '''

    # If query has apostrophe 's' replace it with " "
    query = re.sub("'s", "", query)
    query = re.sub("s'", "", query)

    # If query has n't, for eg- haven't, replace it with 'not'.
    query = re.sub("n't", " not", query)

    # Perform Lemmatization on query.
    lemmed = [
        WordNetLemmatizer().lemmatize(word) for word in word_tokenize(query)
        if word not in STOPWORDS
    ]
    lemmed = [WordNetLemmatizer().lemmatize(word, pos='v') for word in lemmed]

    # lemmed = list(set(lemmed))

    # Applying spell checker on query
    spell = SpellChecker()
    if os.path.exists(os.path.join(os.getcwd(), "MyDictionary.json")):
        spell.word_frequency.load_dictionary("MyDictionary.json")

    misspelled = spell.unknown(lemmed)
    new_query = query
    if len(misspelled) == 0:
        return lemmed, query, new_query
    else:
        correct_words = list(set(lemmed) - misspelled)
        correction = []

        for word in misspelled:
            # Get the one `most likely` answer
            correction.append(spell.correction(word))

        for i in range(len(correction)):
            new_query = new_query.replace(list(misspelled)[i], correction[i])

        # cleaned query--
        lemmed = correct_words + correction

        print(f"Searching for {new_query} instead of {query}")
        return lemmed, query, new_query
예제 #25
0
async def on_message(message):
    if message.author.id != 340495492377083905:
        return

# s = ''.join(filter(str.isalnum, s))

    spell = SpellChecker()
    msgwords = message.content.split()
    channel = message.channel
    for word in msgwords:
        word = ''.join(filter(str.isalpha, word))
        test = spell.correction(word)
        if test != word:
            await channel.send(word +
                               " is misspelled. i think it is spelled " + test)

    await bot.process_commands(message)
def correct(query):
    query.lower()
    query_tokens = query.split(' ')
    final_query = ''
    spell = SpellChecker()
    for word in query_tokens:
        correzione = ''
        if (isMispelled(word)):
            for candidate in spell.candidates(word):
                if candidate in queries:
                    correzione = candidate
            if correzione == '':
                correzione = spell.correction(word)
            final_query += ' ' + correzione
        else:
            final_query += ' ' + word
    return final_query[1:]
class SpellCheck:
    def __init__(self, words_arr):
        self.spell = SpellChecker()
        self.words_arr = words_arr

    def correction(self):
        self.misspelled = self.spell.unknown(self.words_arr)

        for word in self.misspelled:
            correct = self.spell.correction(word)
            try:
                self.words_arr[self.words_arr.index(word)] = correct
            except:
                self.words_arr[self.words_arr.index(
                    word.capitalize())] = correct.capitalize()

        return self.words_arr
예제 #28
0
def preprocess_reviews(reviews, labels):
  spell = SpellChecker()
  print("Number of observations to parse:", len(reviews))
  comments = []
  tags = []
  for i in range(len(reviews)):
    if reviews[i] == "" or isinstance(reviews[i], str) == False or reviews[i] == " ":
            continue
    if i%1000 == 0:
        print("Update:", i)
    reviews[i] = re.sub(r'[!?]','.',reviews[i]) # Removing special character
    reviews[i] = re.sub(r'[^.a-zA-Z0-9\s]',' ',reviews[i]) # Removing special character
    reviews[i] = re.sub('\'',' ',reviews[i]) # Removing quotes
    reviews[i] = re.sub('#','',reviews[i]) # Removing quotes
    reviews[i] = re.sub('\d',' ',reviews[i]) # Replacing digits by space
    reviews[i] = re.sub(r'\s+[a-z][\s$]', ' ',reviews[i]) # Removing single characters and spaces alongside
    reviews[i] = re.sub(r'\s+', ' ',reviews[i]) # Replacing more than one space with a single space
    if 'www.' in reviews[i] or 'http:' in reviews[i] or 'https:' in reviews[i] or '.com' in reviews[i]:
          reviews[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", reviews[i])
    reviews[i] = reviews[i].lower()
    reviews[i] = reviews[i].rstrip()
    spot = reviews[i].find(' .')
    while spot != -1: # Fix lone periods in comment
      sl = list(reviews[i])
      sl[spot] = '.'
      sl[spot+1] = ''
      reviews[i] = "".join(sl)
      spot = reviews[i].find(' .')
    for word in reviews[i].split():
      if word == '.':
        continue
      word_base = word.translate(str.maketrans('', '', string.punctuation))  
      if(bool(spell.unknown([word_base]))):
        recommended = spell.correction(word_base)
        if (recommended in words.words()):
          reviews[i] = reviews[i].replace(word,recommended,1)
        else:
          reviews[i] = reviews[i].replace(word, '')
          reviews[i] = re.sub(r'\s+', ' ',reviews[i]) # Replacing more than one space with a single space
    reviews[i] = reviews[i].replace('..', '.')
    if reviews[i].find('.') == 0:
      reviews[i] = reviews[i].replace('.', '', 1)
      reviews[i] = reviews[i].replace(' ', '', 1)
    comments.append(reviews[i])
    tags.append(labels[i])
  return comments, tags
예제 #29
0
 def _spell_check(self, words):
     spell = SpellChecker()
     tokens = self._tokenization(words)
     misspelled = spell.unknown(tokens)
     if misspelled:
         for i in misspelled:
             # print("Unknown: {}".format(i))
             # print("Correction: {}".format(spell.correction(i)))
             # value = input("Correct?\n")
             # if value == "y":
             words.replace(i, spell.correction(i))
             self.unwords += 1
     if 'io' in words:
         words.replace('io', 'iot')
     if 'iiot' in words:
         words.replace('iiot', 'iot')
     return words
예제 #30
0
	def spelling_correction(self):
		"""
		Corrects the spelling of all words in the corpus (e.g. "hii -> hi")
		"""

		# TODO:
		# Check whether it is only slow if it has lots of things to correct
		# Check whether it can be sped up by GPU
		# Check whether running in parallel increases speed
		# Check whether there are faster libraries


		spell = SpellChecker()
		for i, sentence in enumerate(self.corpus):
			for j, word in enumerate(sentence):
				word = spell.correction(word)
				self.corpus[i][j] = word
예제 #31
0
clear = lambda: os.system('cls')
nxt = (0)
nxts =(4)
nogt = (0)
noot = (1)
myname = ("jarvia")
commands = []
action = []
print ("hello")
name = input ("what is your name>>> ")
print ("hello " + name)
while 1==1:
    response = input("Main>>> ")
    st1=response.split()
    for x in range(len(st1)):
        st1[x]=(spell.correction(st1[x]))
    response=" ".join(str(x) for x in st1)
    if response == "what time is it" or response=='what is the time':
        print(time.strftime("%I")+':'+time.strftime('%M %p'))
    elif response == ("what is your favorite color"):
        print("purple")
    elif response == ("who are you"):
        print ("jarvia")
    elif response == "what are you":
        print ("an AI")
    elif response == ("cool"):
        print ("mhm")
    elif ('what can you do') in response:
        rand = ('I can do Tasks as Playing Music, Videos, Opening any file, websites,Google Search,Movie Search, Put Computer to sleep, Arithmatic Operations, Normal Conversations, Jokes and many more')
    elif('goodbye')  in response:
        print('Ok goodbye')
예제 #32
0
#!/usr/bin/env python

'''
pip install pyspellchecker
'''

from spellchecker import SpellChecker
spell = SpellChecker()

# find those words that may be misspelled
misspelled = spell.unknown(['let', 'us', 'wlak','on','the','groun'])

for word in misspelled:
    # Get the one `most likely` answer
    print(spell.correction(word))

    # Get a list of `likely` options
    print(spell.candidates(word))