def get_language_tool_results(filename, file_contents, locale): joined_text = "".join(file_contents) locale = guess_language(joined_text) if locale == 'auto' else locale locale = 'en-US' if not locale else locale tool = LanguageTool(locale) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file_contents, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' yield message, diffs, SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1)
def process_documentation(self, parsed, locale, languagetool_disable_rules): """ This fixes the parsed documentation comment by applying spell checking and grammatic rules via LanguageTool. :param parsed: Contains parsed documentation comment. :param locale: A locale representing the language you want to have checked. Default is set to 'en-US'. :param languagetool_disable_rules: List of rules to disable checks for. :return: A tuple of fixed parsed documentation comment and warning_desc. """ # Defer import so the check_prerequisites can be run without # language_check being there. from language_check import LanguageTool, correct tool = LanguageTool(locale) tool.disabled.update(languagetool_disable_rules) metadata = iter(parsed) new_metadata = [] for comment in metadata: matches = tool.check(comment.desc) new_desc = correct(comment.desc, matches) new_metadata.append(comment._replace(desc=new_desc)) return (new_metadata, 'Documentation has invalid Grammar/Spelling')
def generate_post(text_models): """ Generates a message for a random user based on chains :param text_models: A dict object with a paring of user names and Markov models. :return: A tuple containing the name of the user being simulated and the message it is to send. """ # TODO: Add grammar checking using language-check module at https://pypi.python.org/pypi/language-check # TODO: Known error: Markovify can break down if the corpus isn't long enough. But if I filter out short corpus- # producing users, Markovify might later try to generate a chain out of a nonexisting text file and error. # Generate post from chain here # Select random chain and markov model rand_user = random.sample(text_models.keys(), 2)[0] rand_model = text_models[rand_user] # Set up language_check tool = language_check.LanguageTool('en-US') # Generate random message print("Generating random message...") message = None while message is None: message = rand_model.make_short_sentence(125, max_overlap_ratio=0.5, max_overlap_total=10) matches = tool.check(message) message = language_check.correct(message, matches) print("Message generated: ", message) return rand_user, message
def langcheck(input_path, output_path, replace_what, replace_with): tool = language_check.LanguageTool('en-UK') path = input_path files=glob.glob(path) for file in files: f1=open(file, 'r') filename = os.path.basename(file) outputpath = output_path filename = replace_last(filename,'e','l') text = f1.read() # print filename text.encode('utf8','ignore') #text.encode('\"', 'ignore') matches = tool.check(text) print len(matches) # q = len(matches) # for x in range(0,q): # matches[x].fromy, matches[x].fromx # print (matches[x]) # print (matches[1]) outputpath += filename temp = language_check.correct(text,matches); f2 = open(outputpath,'w') f2.write(temp.encode('utf8','ignore')); f2.close(); print filename , "is created" f1.close();
def generate_profile_description(person): base_text = "{0} works for the {1} as an {2} making {3} per year." profile_text = base_text.format(person['first_name'],person['organization'],person['title'],format_currency(person['salary'])) matches = tool.check(profile_text) if len(matches) != 0: profile_text = language_check.correct(profile_text, matches) return profile_text
def run(self, filename, file, natural_language: str = 'auto', languagetool_disable_rules: typed_list(str) = (), ): """ Checks the code with LanguageTool. :param natural_language: A locale representing the language you want to have checked. If set to 'auto' the language is guessed. If the language cannot be guessed or an unsupported language is guessed, 'en-US' is used. :param languagetool_disable_rules: List of rules to disable checks for. """ # Defer import so the check_prerequisites can be run without # language_check being there. from language_check import LanguageTool, correct joined_text = ''.join(file) natural_language = (guess_language(joined_text) if natural_language == 'auto' else natural_language) try: tool = LanguageTool(natural_language, motherTongue='en_US') except ValueError: # Using 'en-US' if guessed language is not supported logging.warn( "Changing the `natural_language` setting to 'en-US' as " '`language_check` failed to guess a valid language.' ) natural_language = 'en-US' tool = LanguageTool(natural_language, motherTongue='en_US') tool.disabled.update(languagetool_disable_rules) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' source_range = SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1) yield Result(self, message, diffs=diffs, affected_code=(source_range,))
def generateText(text, order, length): model = generateModel(text, order) currentFragment = text[0:order] raw = "" for i in range(0, length-order): newCharacter = getNextCharacter(model, currentFragment) raw += newCharacter currentFragment = currentFragment[1:] + newCharacter matches = tool.check(raw) return language_check.correct(raw, matches)
def texts(stream): t = [] for sample_text in stream.split('\n')[:-1]: suspicious_spellings = checker.check(sample_text) sample = { 'text' : sample_text, 'suspiciousness' : len(suspicious_spellings), 'correction' : language_check.correct(sample_text, suspicious_spellings) } t.append(sample) return t
def extract_title(titles): short_titles=[] for each in titles: if "..." not in each: if type(each)==str: short_titles.append(each) else: short_titles.append(unidecode(each)) if len(short_titles)==0: short_titles=titles title_index=random.randint(0, len(short_titles)-1) title=replacesynonym(short_titles[title_index]) title=string.capwords(title.replace("_"," ")) matches = tool.check(title) correct_title=language_check.correct(title, matches) return correct_title
def extract_summary_keywords(trend,urls,titles): total_articles_content=extract_text(urls) keywords=extract_keywords_from_all_text(total_articles_content,titles) current_path=os.path.dirname(os.path.realpath(__file__)) current_path=current_path+'\\'+trend+'.txt' with open(current_path, 'w') as the_file: the_file.write(total_articles_content) parser = PlaintextParser.from_file(current_path, Tokenizer(LANGUAGE)) os.remove(current_path) sentences='' for sentence in summarizer(parser.document, 12): sentences=sentences+' '+str(sentence) replaced_syn=replacesynonym(sentences) matches = tool.check(sentences) correct_summary=language_check.correct(sentences, matches) return correct_summary,keywords
def test_README_with_unicode(self): tool = language_check.LanguageTool('en-US') text = ('A sentence with a error in the ' 'Hitchhiker’s Guide tot he Galaxy') matches = tool.check(text) self.assertEqual(len(matches), 2) self.assertEqual((matches[0].fromy, matches[0].fromx), (0, 16)) self.assertEqual((matches[0].ruleId, matches[0].replacements), ('EN_A_VS_AN', ['an'])) self.assertEqual((matches[1].fromy, matches[1].fromx), (0, 50)) self.assertEqual((matches[1].ruleId, matches[1].replacements), ('TOT_HE', ['to the'])) corrected = language_check.correct(text, matches) self.assertEqual(corrected, 'A sentence with an error in the ' 'Hitchhiker’s Guide to the Galaxy')
def get_language_tool_results(file_contents, locale): tool = LanguageTool(locale) joined_text = "".join(file_contents) matches = tool.check(joined_text) for match in matches: if not match.replacements: diff = None else: replaced = correct(joined_text, [match]).splitlines(True) diff = Diff.from_string_arrays(file_contents, replaced) rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = (match.msg + ' (' + rule_id + ', ' + _('Found at column {col}.').format(col=match.fromx+1) + ')') yield message, diff, match.fromy+1
def run(self, filename, file, language: str='auto', languagetool_disable_rules: typed_list(str)=()): ''' Checks the code with LanguageTool. :param language: A locale representing the language you want to have checked. If set to 'auto' the language is guessed. If the language cannot be guessed, 'en-US' is used. :param languagetool_disable_rules: List of rules to disable checks for. ''' joined_text = "".join(file) language = (guess_language(joined_text) if language == 'auto' else language) language = 'en-US' if not language else language tool = LanguageTool(language, motherTongue="en_US") tool.disabled.update(languagetool_disable_rules) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' source_range = SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1) yield Result(self, message, diffs=diffs, affected_code=(source_range,))
error_words.append(error.word) return error_words def select_suggestion(self): correct_words = [] for word in self.errors: print "error word: ", word suggestion = self.replace_word(word) print "suggestion: ", suggestion correct_words.append(suggestion) return correct_words def replace_word(self, word): if self.dictionary.check(word): return word suggestions = self.dictionary.suggest(word) if suggestions and edit_distance(word, suggestions[0]) <= self.max_dist: return suggestions[0] if __name__ == "__main__": sentence = "Helo! Ths is vomment." checker = SpellingCheck(sentence) print checker.select_suggestion() print checker.spelling_checker.get_text() language_tool = language_check.LanguageTool("en-US") matches = language_tool.check(sentence) sentence = language_check.correct(sentence, matches) print sentence
def proofread(text): tool = language_check.LanguageTool('en-US') matches = tool.check(text) text = language_check.correct(text,matches) text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore') return text
def grammarchecklanguagecheck(text): tool = language_check.LanguageTool('en-US') matches = tool.check(text) return language_check.correct(text, matches).lower()
def language_corrector(text): lang_tool = language_check.LanguageTool('en-US') return language_check.correct(text, lang_tool.check(text))