def get_language_tool_results(filename, file_contents, locale):
    joined_text = "".join(file_contents)
    locale = guess_language(joined_text) if locale == 'auto' else locale
    locale = 'en-US' if not locale else locale

    tool = LanguageTool(locale)
    matches = tool.check(joined_text)
    for match in matches:
        if not match.replacements:
            diffs = None
        else:
            replaced = correct(joined_text, [match]).splitlines(True)
            diffs = {filename:
                     Diff.from_string_arrays(file_contents, replaced)}

        rule_id = match.ruleId
        if match.subId is not None:
            rule_id += '[{}]'.format(match.subId)

        message = match.msg + ' (' + rule_id + ')'
        yield message, diffs, SourceRange.from_values(filename,
                                                      match.fromy+1,
                                                      match.fromx+1,
                                                      match.toy+1,
                                                      match.tox+1)
    def process_documentation(self,
                              parsed,
                              locale,
                              languagetool_disable_rules):
        """
        This fixes the parsed documentation comment by applying spell checking
        and grammatic rules via LanguageTool.

        :param parsed:
            Contains parsed documentation comment.
        :param locale:
            A locale representing the language you want to have checked.
            Default is set to 'en-US'.
        :param languagetool_disable_rules:
            List of rules to disable checks for.
        :return:
            A tuple of fixed parsed documentation comment and warning_desc.
        """
        # Defer import so the check_prerequisites can be run without
        # language_check being there.
        from language_check import LanguageTool, correct

        tool = LanguageTool(locale)
        tool.disabled.update(languagetool_disable_rules)

        metadata = iter(parsed)

        new_metadata = []
        for comment in metadata:
            matches = tool.check(comment.desc)
            new_desc = correct(comment.desc, matches)
            new_metadata.append(comment._replace(desc=new_desc))

        return (new_metadata,
                'Documentation has invalid Grammar/Spelling')
def generate_post(text_models):
    """
    Generates a message for a random user based on chains
    :param text_models: A dict object with a paring of user names and Markov models.
    :return: A tuple containing the name of the user being simulated and the message it is to send.
    """
    # TODO: Add grammar checking using language-check module at https://pypi.python.org/pypi/language-check
    # TODO: Known error: Markovify can break down if the corpus isn't long enough. But if I filter out short corpus-
    # producing users, Markovify might later try to generate a chain out of a nonexisting text file and error.

    # Generate post from chain here
    # Select random chain and markov model
    rand_user = random.sample(text_models.keys(), 2)[0]
    rand_model = text_models[rand_user]

    # Set up language_check
    tool = language_check.LanguageTool('en-US')

    # Generate random message
    print("Generating random message...")
    message = None
    while message is None:
        message = rand_model.make_short_sentence(125, max_overlap_ratio=0.5, max_overlap_total=10)
        matches = tool.check(message)
        message = language_check.correct(message, matches)
    print("Message generated: ", message)

    return rand_user, message
def langcheck(input_path, output_path, replace_what, replace_with):
	tool = language_check.LanguageTool('en-UK')
	path = input_path
	files=glob.glob(path)
	 
	for file in files:     
		f1=open(file, 'r')  
		filename = os.path.basename(file)
		outputpath = output_path
		filename = replace_last(filename,'e','l')
		text = f1.read()
		# print filename
		text.encode('utf8','ignore')
		#text.encode('\"', 'ignore')
		matches = tool.check(text)
		print len(matches)
		# q = len(matches)
		# for x in range(0,q):
		# 	matches[x].fromy, matches[x].fromx
		# 	print (matches[x])
		# print (matches[1])
		outputpath += filename
		temp = language_check.correct(text,matches);
		f2 = open(outputpath,'w')
		f2.write(temp.encode('utf8','ignore'));
		f2.close();
		print filename , "is created"
		f1.close();
def generate_profile_description(person):
    base_text = "{0} works for the {1} as an {2} making {3} per year."
    profile_text = base_text.format(person['first_name'],person['organization'],person['title'],format_currency(person['salary']))
    matches = tool.check(profile_text)
    if len(matches) != 0:
        profile_text = language_check.correct(profile_text, matches)
    return profile_text
    def run(self,
            filename,
            file,
            natural_language: str = 'auto',
            languagetool_disable_rules: typed_list(str) = (),
            ):
        """
        Checks the code with LanguageTool.

        :param natural_language:           A locale representing the language
                                           you want to have checked. If set to
                                           'auto' the language is guessed.
                                           If the language cannot be guessed or
                                           an unsupported language is guessed,
                                           'en-US' is used.
        :param languagetool_disable_rules: List of rules to disable checks for.
        """
        # Defer import so the check_prerequisites can be run without
        # language_check being there.
        from language_check import LanguageTool, correct

        joined_text = ''.join(file)
        natural_language = (guess_language(joined_text)
                            if natural_language == 'auto'
                            else natural_language)

        try:
            tool = LanguageTool(natural_language, motherTongue='en_US')
        except ValueError:
            # Using 'en-US' if guessed language is not supported
            logging.warn(
                "Changing the `natural_language` setting to 'en-US' as "
                '`language_check` failed to guess a valid language.'
            )
            natural_language = 'en-US'
            tool = LanguageTool(natural_language, motherTongue='en_US')

        tool.disabled.update(languagetool_disable_rules)
        matches = tool.check(joined_text)
        for match in matches:
            if not match.replacements:
                diffs = None
            else:
                replaced = correct(joined_text, [match]).splitlines(True)
                diffs = {filename:
                         Diff.from_string_arrays(file, replaced)}

            rule_id = match.ruleId
            if match.subId is not None:
                rule_id += '[{}]'.format(match.subId)

            message = match.msg + ' (' + rule_id + ')'
            source_range = SourceRange.from_values(filename,
                                                   match.fromy+1,
                                                   match.fromx+1,
                                                   match.toy+1,
                                                   match.tox+1)
            yield Result(self, message, diffs=diffs,
                         affected_code=(source_range,))
Exemple #7
0
def generateText(text, order, length):
    model = generateModel(text, order)
    currentFragment = text[0:order]
    raw = ""
    for i in range(0, length-order):
        newCharacter = getNextCharacter(model, currentFragment)
        raw += newCharacter
        currentFragment = currentFragment[1:] + newCharacter
    matches = tool.check(raw)
    return language_check.correct(raw, matches)
def texts(stream):

	t = []

	for sample_text in stream.split('\n')[:-1]:

		suspicious_spellings = checker.check(sample_text)
		
		sample = {
			'text' : sample_text,
			'suspiciousness' : len(suspicious_spellings),
			'correction' : language_check.correct(sample_text, suspicious_spellings)

		}
		t.append(sample)
	return t
Exemple #9
0
def extract_title(titles):
	short_titles=[]
	for each in titles:
		if "..." not in each:
			if type(each)==str:
				short_titles.append(each)
			else:
				short_titles.append(unidecode(each))
	if len(short_titles)==0:
		short_titles=titles				
	title_index=random.randint(0, len(short_titles)-1)
	title=replacesynonym(short_titles[title_index])
	title=string.capwords(title.replace("_"," "))
	matches = tool.check(title)
	correct_title=language_check.correct(title, matches)
	return correct_title
Exemple #10
0
def extract_summary_keywords(trend,urls,titles):  
	total_articles_content=extract_text(urls)
	keywords=extract_keywords_from_all_text(total_articles_content,titles)
	current_path=os.path.dirname(os.path.realpath(__file__))
	current_path=current_path+'\\'+trend+'.txt'
	with open(current_path, 'w') as the_file:
	 	the_file.write(total_articles_content)
	parser = PlaintextParser.from_file(current_path, Tokenizer(LANGUAGE))
	os.remove(current_path)
	sentences=''
	for sentence in summarizer(parser.document, 12):
		sentences=sentences+' '+str(sentence) 
	replaced_syn=replacesynonym(sentences)
	matches = tool.check(sentences)
	correct_summary=language_check.correct(sentences, matches)
	return correct_summary,keywords
Exemple #11
0
 def test_README_with_unicode(self):
     tool = language_check.LanguageTool('en-US')
     text = ('A sentence with a error in the '
             'Hitchhiker’s Guide tot he Galaxy')
     matches = tool.check(text)
     self.assertEqual(len(matches), 2)
     self.assertEqual((matches[0].fromy, matches[0].fromx),
                      (0, 16))
     self.assertEqual((matches[0].ruleId, matches[0].replacements),
                      ('EN_A_VS_AN', ['an']))
     self.assertEqual((matches[1].fromy, matches[1].fromx),
                      (0, 50))
     self.assertEqual((matches[1].ruleId, matches[1].replacements),
                      ('TOT_HE', ['to the']))
     corrected = language_check.correct(text, matches)
     self.assertEqual(corrected, 'A sentence with an error in the '
                                 'Hitchhiker’s Guide to the Galaxy')
Exemple #12
0
def get_language_tool_results(file_contents, locale):
    tool = LanguageTool(locale)
    joined_text = "".join(file_contents)
    matches = tool.check(joined_text)
    for match in matches:
        if not match.replacements:
            diff = None
        else:
            replaced = correct(joined_text, [match]).splitlines(True)
            diff = Diff.from_string_arrays(file_contents, replaced)

        rule_id = match.ruleId
        if match.subId is not None:
            rule_id += '[{}]'.format(match.subId)

        message = (match.msg + ' (' + rule_id + ', ' +
                   _('Found at column {col}.').format(col=match.fromx+1) + ')')
        yield message, diff, match.fromy+1
Exemple #13
0
    def run(self,
            filename,
            file,
            language: str='auto',
            languagetool_disable_rules: typed_list(str)=()):
        '''
        Checks the code with LanguageTool.

        :param language:                   A locale representing the language
                                           you want to have checked. If set to
                                           'auto' the language is guessed.
                                           If the language cannot be guessed,
                                           'en-US' is used.
        :param languagetool_disable_rules: List of rules to disable checks for.
        '''
        joined_text = "".join(file)
        language = (guess_language(joined_text)
                    if language == 'auto' else language)
        language = 'en-US' if not language else language

        tool = LanguageTool(language, motherTongue="en_US")
        tool.disabled.update(languagetool_disable_rules)

        matches = tool.check(joined_text)
        for match in matches:
            if not match.replacements:
                diffs = None
            else:
                replaced = correct(joined_text, [match]).splitlines(True)
                diffs = {filename:
                         Diff.from_string_arrays(file, replaced)}

            rule_id = match.ruleId
            if match.subId is not None:
                rule_id += '[{}]'.format(match.subId)

            message = match.msg + ' (' + rule_id + ')'
            source_range = SourceRange.from_values(filename,
                                                   match.fromy+1,
                                                   match.fromx+1,
                                                   match.toy+1,
                                                   match.tox+1)
            yield Result(self, message, diffs=diffs,
                         affected_code=(source_range,))
Exemple #14
0
            error_words.append(error.word)
        return error_words

    def select_suggestion(self):
        correct_words = []
        for word in self.errors:
            print "error word: ", word
            suggestion = self.replace_word(word)
            print "suggestion: ", suggestion
            correct_words.append(suggestion)
        return correct_words

    def replace_word(self, word):
        if self.dictionary.check(word):
            return word

        suggestions = self.dictionary.suggest(word)
        if suggestions and edit_distance(word, suggestions[0]) <= self.max_dist:
            return suggestions[0]


if __name__ == "__main__":
    sentence = "Helo! Ths is vomment."
    checker = SpellingCheck(sentence)
    print checker.select_suggestion()
    print checker.spelling_checker.get_text()

    language_tool = language_check.LanguageTool("en-US")
    matches = language_tool.check(sentence)
    sentence = language_check.correct(sentence, matches)
    print sentence
Exemple #15
0
def proofread(text):
	tool = language_check.LanguageTool('en-US')
	matches = tool.check(text)
	text = language_check.correct(text,matches)
	text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore')
	return text
def grammarchecklanguagecheck(text):
    tool = language_check.LanguageTool('en-US')
    matches = tool.check(text)
    return language_check.correct(text, matches).lower()
 def language_corrector(text):
     lang_tool = language_check.LanguageTool('en-US')
     return language_check.correct(text, lang_tool.check(text))