def get_language_tool_results(filename, file_contents, locale):
    joined_text = "".join(file_contents)
    locale = guess_language(joined_text) if locale == 'auto' else locale
    locale = 'en-US' if not locale else locale

    tool = LanguageTool(locale)
    matches = tool.check(joined_text)
    for match in matches:
        if not match.replacements:
            diffs = None
        else:
            replaced = correct(joined_text, [match]).splitlines(True)
            diffs = {filename:
                     Diff.from_string_arrays(file_contents, replaced)}

        rule_id = match.ruleId
        if match.subId is not None:
            rule_id += '[{}]'.format(match.subId)

        message = match.msg + ' (' + rule_id + ')'
        yield message, diffs, SourceRange.from_values(filename,
                                                      match.fromy+1,
                                                      match.fromx+1,
                                                      match.toy+1,
                                                      match.tox+1)
Exemple #2
0
def get_language_tool_results(filename, file_contents, locale):
    joined_text = "".join(file_contents)
    locale = guess_language(joined_text) if locale == 'auto' else locale
    locale = 'en-US' if not locale else locale

    tool = LanguageTool(locale)
    matches = tool.check(joined_text)
    for match in matches:
        if not match.replacements:
            diffs = None
        else:
            replaced = correct(joined_text, [match]).splitlines(True)
            diffs = {
                filename: Diff.from_string_arrays(file_contents, replaced)
            }

        rule_id = match.ruleId
        if match.subId is not None:
            rule_id += '[{}]'.format(match.subId)

        message = match.msg + ' (' + rule_id + ')'
        yield message, diffs, SourceRange.from_values(filename,
                                                      match.fromy + 1,
                                                      match.fromx + 1,
                                                      match.toy + 1,
                                                      match.tox + 1)
    def process_documentation(self,
                              parsed,
                              locale,
                              languagetool_disable_rules):
        """
        This fixes the parsed documentation comment by applying spell checking
        and grammatic rules via LanguageTool.

        :param parsed:
            Contains parsed documentation comment.
        :param locale:
            A locale representing the language you want to have checked.
            Default is set to 'en-US'.
        :param languagetool_disable_rules:
            List of rules to disable checks for.
        :return:
            A tuple of fixed parsed documentation comment and warning_desc.
        """
        # Defer import so the check_prerequisites can be run without
        # language_check being there.
        from language_check import LanguageTool, correct

        tool = LanguageTool(locale)
        tool.disabled.update(languagetool_disable_rules)

        metadata = iter(parsed)

        new_metadata = []
        for comment in metadata:
            matches = tool.check(comment.desc)
            new_desc = correct(comment.desc, matches)
            new_metadata.append(comment._replace(desc=new_desc))

        return (new_metadata,
                'Documentation has invalid Grammar/Spelling')
Exemple #4
0
    def process_documentation(self, parsed, locale,
                              languagetool_disable_rules):
        """
        This fixes the parsed documentation comment by applying spell checking
        and grammatic rules via LanguageTool.

        :param parsed:
            Contains parsed documentation comment.
        :param locale:
            A locale representing the language you want to have checked.
            Default is set to 'en-US'.
        :param languagetool_disable_rules:
            List of rules to disable checks for.
        :return:
            A tuple of fixed parsed documentation comment and warning_desc.
        """
        # Defer import so the check_prerequisites can be run without
        # language_check being there.
        from language_check import LanguageTool, correct

        tool = LanguageTool(locale)
        tool.disabled.update(languagetool_disable_rules)

        metadata = iter(parsed)

        new_metadata = []
        for comment in metadata:
            matches = tool.check(comment.desc)
            new_desc = correct(comment.desc, matches)
            new_metadata.append(comment._replace(desc=new_desc))

        return (new_metadata, 'Documentation has invalid Grammar/Spelling')
    def run(self,
            filename,
            file,
            natural_language: str = 'auto',
            languagetool_disable_rules: typed_list(str) = (),
            ):
        """
        Checks the code with LanguageTool.

        :param natural_language:           A locale representing the language
                                           you want to have checked. If set to
                                           'auto' the language is guessed.
                                           If the language cannot be guessed or
                                           an unsupported language is guessed,
                                           'en-US' is used.
        :param languagetool_disable_rules: List of rules to disable checks for.
        """
        # Defer import so the check_prerequisites can be run without
        # language_check being there.
        from language_check import LanguageTool, correct

        joined_text = ''.join(file)
        natural_language = (guess_language(joined_text)
                            if natural_language == 'auto'
                            else natural_language)

        try:
            tool = LanguageTool(natural_language, motherTongue='en_US')
        except ValueError:
            # Using 'en-US' if guessed language is not supported
            logging.warn(
                "Changing the `natural_language` setting to 'en-US' as "
                '`language_check` failed to guess a valid language.'
            )
            natural_language = 'en-US'
            tool = LanguageTool(natural_language, motherTongue='en_US')

        tool.disabled.update(languagetool_disable_rules)
        matches = tool.check(joined_text)
        for match in matches:
            if not match.replacements:
                diffs = None
            else:
                replaced = correct(joined_text, [match]).splitlines(True)
                diffs = {filename:
                         Diff.from_string_arrays(file, replaced)}

            rule_id = match.ruleId
            if match.subId is not None:
                rule_id += '[{}]'.format(match.subId)

            message = match.msg + ' (' + rule_id + ')'
            source_range = SourceRange.from_values(filename,
                                                   match.fromy+1,
                                                   match.fromx+1,
                                                   match.toy+1,
                                                   match.tox+1)
            yield Result(self, message, diffs=diffs,
                         affected_code=(source_range,))
 def __commas(self, obj):
     tool = LanguageTool("ru-RU")
     matches = tool.check(self.__txt)
     self.__value = len(matches) / obj.get_totalword()
     if self.__value >= 0.5:
         self.__value = 1.0
     else:
         self.__value *= 2
    def run(self,
            filename,
            file,
            natural_language: str='auto',
            languagetool_disable_rules: typed_list(str)=()):
        '''
        Checks the code with LanguageTool.

        :param natural_language:           A locale representing the language
                                           you want to have checked. If set to
                                           'auto' the language is guessed.
                                           If the language cannot be guessed or
                                           an unsupported language is guessed,
                                           'en-US' is used.
        :param languagetool_disable_rules: List of rules to disable checks for.
        '''
        # Defer import so the check_prerequisites can be run without
        # language_check being there.
        from language_check import LanguageTool, correct

        joined_text = ''.join(file)
        natural_language = (guess_language(joined_text)
                            if natural_language == 'auto'
                            else natural_language)

        try:
            tool = LanguageTool(natural_language, motherTongue='en_US')
        except ValueError:
            # Using 'en-US' if guessed language is not supported
            logging.warn(
                "Changing the `natural_language` setting to 'en-US' as "
                '`language_check` failed to guess a valid language.'
            )
            natural_language = 'en-US'
            tool = LanguageTool(natural_language, motherTongue='en_US')

        tool.disabled.update(languagetool_disable_rules)
        matches = tool.check(joined_text)
        for match in matches:
            if not match.replacements:
                diffs = None
            else:
                replaced = correct(joined_text, [match]).splitlines(True)
                diffs = {filename:
                         Diff.from_string_arrays(file, replaced)}

            rule_id = match.ruleId
            if match.subId is not None:
                rule_id += '[{}]'.format(match.subId)

            message = match.msg + ' (' + rule_id + ')'
            source_range = SourceRange.from_values(filename,
                                                   match.fromy+1,
                                                   match.fromx+1,
                                                   match.toy+1,
                                                   match.tox+1)
            yield Result(self, message, diffs=diffs,
                         affected_code=(source_range,))
def main():
    lt = LanguageTool('en-US')

    dir = os.fsencode(DATA_DIR_STR)
    count = {}

    for file in os.listdir(dir)[:10]:
        filename = os.fsdecode(file)
        path = os.path.join(dir, file)
        print(path)
        with open(path) as f:
            for line in f.readlines():
                print(line)
                cleaned_line = clean_text(line)
                count = count_words(cleaned_line, count)
                # matches = lt.check(line)
                # for match in matches:
                #     print(match)
        #print(os.path.join(os.fsdecode(dir), filename))
    words = sorted(count.items(), key=lambda x : x[1], reverse=True)
    for item in words:
        print(item)
    x = [i for i in range(1, len(words)+1)]
    y = [num[1] for num in words]
    plt.plot(x, y)
    plt.show()
Exemple #9
0
def get_language_tool_results(file_contents, locale):
    tool = LanguageTool(locale)
    joined_text = "".join(file_contents)
    matches = tool.check(joined_text)
    for match in matches:
        if not match.replacements:
            diff = None
        else:
            replaced = correct(joined_text, [match]).splitlines(True)
            diff = Diff.from_string_arrays(file_contents, replaced)

        rule_id = match.ruleId
        if match.subId is not None:
            rule_id += '[{}]'.format(match.subId)

        message = (match.msg + ' (' + rule_id + ', ' +
                   _('Found at column {col}.').format(col=match.fromx+1) + ')')
        yield message, diff, match.fromy+1
Exemple #10
0
    def run(self,
            filename,
            file,
            language: str='auto',
            languagetool_disable_rules: typed_list(str)=()):
        '''
        Checks the code with LanguageTool.

        :param language:                   A locale representing the language
                                           you want to have checked. If set to
                                           'auto' the language is guessed.
                                           If the language cannot be guessed,
                                           'en-US' is used.
        :param languagetool_disable_rules: List of rules to disable checks for.
        '''
        joined_text = "".join(file)
        language = (guess_language(joined_text)
                    if language == 'auto' else language)
        language = 'en-US' if not language else language

        tool = LanguageTool(language, motherTongue="en_US")
        tool.disabled.update(languagetool_disable_rules)

        matches = tool.check(joined_text)
        for match in matches:
            if not match.replacements:
                diffs = None
            else:
                replaced = correct(joined_text, [match]).splitlines(True)
                diffs = {filename:
                         Diff.from_string_arrays(file, replaced)}

            rule_id = match.ruleId
            if match.subId is not None:
                rule_id += '[{}]'.format(match.subId)

            message = match.msg + ' (' + rule_id + ')'
            source_range = SourceRange.from_values(filename,
                                                   match.fromy+1,
                                                   match.fromx+1,
                                                   match.toy+1,
                                                   match.tox+1)
            yield Result(self, message, diffs=diffs,
                         affected_code=(source_range,))
    def run(self,
            filename,
            file,
            language: str='auto',
            languagetool_disable_rules: typed_list(str)=()):
        '''
        Checks the code with LanguageTool.

        :param language:                   A locale representing the language
                                           you want to have checked. If set to
                                           'auto' the language is guessed.
                                           If the language cannot be guessed,
                                           'en-US' is used.
        :param languagetool_disable_rules: List of rules to disable checks for.
        '''
        joined_text = "".join(file)
        language = (guess_language(joined_text)
                    if language == 'auto' else language)
        language = 'en-US' if not language else language

        tool = LanguageTool(language, motherTongue="en_US")
        tool.disabled.update(languagetool_disable_rules)

        matches = tool.check(joined_text)
        for match in matches:
            if not match.replacements:
                diffs = None
            else:
                replaced = correct(joined_text, [match]).splitlines(True)
                diffs = {filename:
                         Diff.from_string_arrays(file, replaced)}

            rule_id = match.ruleId
            if match.subId is not None:
                rule_id += '[{}]'.format(match.subId)

            message = match.msg + ' (' + rule_id + ')'
            source_range = SourceRange.from_values(filename,
                                                   match.fromy+1,
                                                   match.fromx+1,
                                                   match.toy+1,
                                                   match.tox+1)
            yield Result(self, message, diffs=diffs,
                         affected_code=(source_range,))
Exemple #12
0
from language_check import LanguageTool as lc
import urllib.request as ul

lc._start_server()
ul.urlopen('http://127.0.0.1:8081', bytes("text=asd%0A&language=en-GB",
                                          "UTF-8"), 300)
def make_dict():
    morph = pymorphy2.MorphAnalyzer()
    #tool=language_check.LanguageTool("ru-RU")
    tool=LanguageTool("ru-RU")
    orf_err,total_words,tonal,stop_word,tonal_words=0,0,0,0,0
    work_file = "D:\\text"
    if os.path.isfile(work_file):
        print('Рабочий файл: ' + work_file)
    # читаем файл
    file = open(work_file, 'r')
    try:
        txt = file.read()
    finally:
        file.close()



    txt.strip("\n")
    matches=tool.check(txt)

    #for i in range(len(matches)):#для проверки работы,показывает ошибки(очень круто)
    #    print(matches[i])

    # выбираем слова через регулярные выражения
    p,p1 = re.compile("([а-яА-Я-']+)"),re.compile("([!?])")
    res,res1 = p.findall(txt),p1.findall(txt)
    c = enchant.Dict("ru_RU")
    # создаем словарь. Ключ-слово, Значение-частота повторения
    lsWord = {}
    #ищем слова
    for key in res:
        #key = key.lower()#если оставить,то имена собственные будут считаться как ошибка
        #ПРОВЕРКИ НИЖЕ ВЫНЕСТИ В ОТЕЛЬНЫЙ МЕТОД
        #******************************************
        # проверка на стоп слова
        tmp1=morph.parse(key)[0]
        # INTJ - междометия, PRCL - частицы, CONJ - союзы, PRED - предикатив (некогда)
        if tmp1.tag.POS=="INTJ" or tmp1.tag.POS=="PRCL" or tmp1.tag.POS=="CONJ" or tmp1.tag.POS=="PRED":
            stop_word+=1
        # проверка на слова COMP - компаратив (лучше,хуже и т.д.!!!СЛОВО "ВЫШЕ" НЕ УЧИТЫВАЕТСЯ!!!)
        if tmp1.tag.POS=="COMP":
            tonal_words+=1
        # проверка на орфографическую правильность
        if c.check(key.normal_form)==False:
            orf_err+=1
            #print(key)#вывод некорректных слов
        #******************************************
        if key in lsWord:
            value = lsWord[key]
            lsWord[key] = value + 1
            total_words+=1
        else:
            lsWord[key] = 1
            total_words+=1
    #ищем "?" и "!"
    for key in res1:
        key = key.lower()
        if key in lsWord:
            value = lsWord[key]
            lsWord[key] = value + 1
            tonal+=1
        else:
            lsWord[key] = 1
            tonal+=1
# создаем список ключей отсортированный по значению словаря lsWord
    sorted_keys = sorted(lsWord, key=lambda x: int(lsWord[x]), reverse=True)
    file = open(work_file + '_dict.csv', 'w')
    morph = pymorphy2.MorphAnalyzer()
    try:
        for key in sorted_keys:
            #преобразование к инфинитиву
            #key.normal_form
            p=morph.parse(key)[0]
            words=p.normal_form
            s = str("{0};{1}\r").format(words, lsWord[key])
            file.write(s)
        print('Результат записан: ' + work_file + '_dict.csv')
    finally:
        file.close()
    return orf_err,total_words,tonal,stop_word,tonal_words,len(matches)