def get_language_tool_results(filename, file_contents, locale): joined_text = "".join(file_contents) locale = guess_language(joined_text) if locale == 'auto' else locale locale = 'en-US' if not locale else locale tool = LanguageTool(locale) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file_contents, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' yield message, diffs, SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1)
def get_language_tool_results(filename, file_contents, locale): joined_text = "".join(file_contents) locale = guess_language(joined_text) if locale == 'auto' else locale locale = 'en-US' if not locale else locale tool = LanguageTool(locale) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = { filename: Diff.from_string_arrays(file_contents, replaced) } rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' yield message, diffs, SourceRange.from_values(filename, match.fromy + 1, match.fromx + 1, match.toy + 1, match.tox + 1)
def process_documentation(self, parsed, locale, languagetool_disable_rules): """ This fixes the parsed documentation comment by applying spell checking and grammatic rules via LanguageTool. :param parsed: Contains parsed documentation comment. :param locale: A locale representing the language you want to have checked. Default is set to 'en-US'. :param languagetool_disable_rules: List of rules to disable checks for. :return: A tuple of fixed parsed documentation comment and warning_desc. """ # Defer import so the check_prerequisites can be run without # language_check being there. from language_check import LanguageTool, correct tool = LanguageTool(locale) tool.disabled.update(languagetool_disable_rules) metadata = iter(parsed) new_metadata = [] for comment in metadata: matches = tool.check(comment.desc) new_desc = correct(comment.desc, matches) new_metadata.append(comment._replace(desc=new_desc)) return (new_metadata, 'Documentation has invalid Grammar/Spelling')
def run(self, filename, file, natural_language: str = 'auto', languagetool_disable_rules: typed_list(str) = (), ): """ Checks the code with LanguageTool. :param natural_language: A locale representing the language you want to have checked. If set to 'auto' the language is guessed. If the language cannot be guessed or an unsupported language is guessed, 'en-US' is used. :param languagetool_disable_rules: List of rules to disable checks for. """ # Defer import so the check_prerequisites can be run without # language_check being there. from language_check import LanguageTool, correct joined_text = ''.join(file) natural_language = (guess_language(joined_text) if natural_language == 'auto' else natural_language) try: tool = LanguageTool(natural_language, motherTongue='en_US') except ValueError: # Using 'en-US' if guessed language is not supported logging.warn( "Changing the `natural_language` setting to 'en-US' as " '`language_check` failed to guess a valid language.' ) natural_language = 'en-US' tool = LanguageTool(natural_language, motherTongue='en_US') tool.disabled.update(languagetool_disable_rules) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' source_range = SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1) yield Result(self, message, diffs=diffs, affected_code=(source_range,))
def __commas(self, obj): tool = LanguageTool("ru-RU") matches = tool.check(self.__txt) self.__value = len(matches) / obj.get_totalword() if self.__value >= 0.5: self.__value = 1.0 else: self.__value *= 2
def run(self, filename, file, natural_language: str='auto', languagetool_disable_rules: typed_list(str)=()): ''' Checks the code with LanguageTool. :param natural_language: A locale representing the language you want to have checked. If set to 'auto' the language is guessed. If the language cannot be guessed or an unsupported language is guessed, 'en-US' is used. :param languagetool_disable_rules: List of rules to disable checks for. ''' # Defer import so the check_prerequisites can be run without # language_check being there. from language_check import LanguageTool, correct joined_text = ''.join(file) natural_language = (guess_language(joined_text) if natural_language == 'auto' else natural_language) try: tool = LanguageTool(natural_language, motherTongue='en_US') except ValueError: # Using 'en-US' if guessed language is not supported logging.warn( "Changing the `natural_language` setting to 'en-US' as " '`language_check` failed to guess a valid language.' ) natural_language = 'en-US' tool = LanguageTool(natural_language, motherTongue='en_US') tool.disabled.update(languagetool_disable_rules) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' source_range = SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1) yield Result(self, message, diffs=diffs, affected_code=(source_range,))
def main(): lt = LanguageTool('en-US') dir = os.fsencode(DATA_DIR_STR) count = {} for file in os.listdir(dir)[:10]: filename = os.fsdecode(file) path = os.path.join(dir, file) print(path) with open(path) as f: for line in f.readlines(): print(line) cleaned_line = clean_text(line) count = count_words(cleaned_line, count) # matches = lt.check(line) # for match in matches: # print(match) #print(os.path.join(os.fsdecode(dir), filename)) words = sorted(count.items(), key=lambda x : x[1], reverse=True) for item in words: print(item) x = [i for i in range(1, len(words)+1)] y = [num[1] for num in words] plt.plot(x, y) plt.show()
def get_language_tool_results(file_contents, locale): tool = LanguageTool(locale) joined_text = "".join(file_contents) matches = tool.check(joined_text) for match in matches: if not match.replacements: diff = None else: replaced = correct(joined_text, [match]).splitlines(True) diff = Diff.from_string_arrays(file_contents, replaced) rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = (match.msg + ' (' + rule_id + ', ' + _('Found at column {col}.').format(col=match.fromx+1) + ')') yield message, diff, match.fromy+1
def run(self, filename, file, language: str='auto', languagetool_disable_rules: typed_list(str)=()): ''' Checks the code with LanguageTool. :param language: A locale representing the language you want to have checked. If set to 'auto' the language is guessed. If the language cannot be guessed, 'en-US' is used. :param languagetool_disable_rules: List of rules to disable checks for. ''' joined_text = "".join(file) language = (guess_language(joined_text) if language == 'auto' else language) language = 'en-US' if not language else language tool = LanguageTool(language, motherTongue="en_US") tool.disabled.update(languagetool_disable_rules) matches = tool.check(joined_text) for match in matches: if not match.replacements: diffs = None else: replaced = correct(joined_text, [match]).splitlines(True) diffs = {filename: Diff.from_string_arrays(file, replaced)} rule_id = match.ruleId if match.subId is not None: rule_id += '[{}]'.format(match.subId) message = match.msg + ' (' + rule_id + ')' source_range = SourceRange.from_values(filename, match.fromy+1, match.fromx+1, match.toy+1, match.tox+1) yield Result(self, message, diffs=diffs, affected_code=(source_range,))
from language_check import LanguageTool as lc import urllib.request as ul lc._start_server() ul.urlopen('http://127.0.0.1:8081', bytes("text=asd%0A&language=en-GB", "UTF-8"), 300)
def make_dict(): morph = pymorphy2.MorphAnalyzer() #tool=language_check.LanguageTool("ru-RU") tool=LanguageTool("ru-RU") orf_err,total_words,tonal,stop_word,tonal_words=0,0,0,0,0 work_file = "D:\\text" if os.path.isfile(work_file): print('Рабочий файл: ' + work_file) # читаем файл file = open(work_file, 'r') try: txt = file.read() finally: file.close() txt.strip("\n") matches=tool.check(txt) #for i in range(len(matches)):#для проверки работы,показывает ошибки(очень круто) # print(matches[i]) # выбираем слова через регулярные выражения p,p1 = re.compile("([а-яА-Я-']+)"),re.compile("([!?])") res,res1 = p.findall(txt),p1.findall(txt) c = enchant.Dict("ru_RU") # создаем словарь. Ключ-слово, Значение-частота повторения lsWord = {} #ищем слова for key in res: #key = key.lower()#если оставить,то имена собственные будут считаться как ошибка #ПРОВЕРКИ НИЖЕ ВЫНЕСТИ В ОТЕЛЬНЫЙ МЕТОД #****************************************** # проверка на стоп слова tmp1=morph.parse(key)[0] # INTJ - междометия, PRCL - частицы, CONJ - союзы, PRED - предикатив (некогда) if tmp1.tag.POS=="INTJ" or tmp1.tag.POS=="PRCL" or tmp1.tag.POS=="CONJ" or tmp1.tag.POS=="PRED": stop_word+=1 # проверка на слова COMP - компаратив (лучше,хуже и т.д.!!!СЛОВО "ВЫШЕ" НЕ УЧИТЫВАЕТСЯ!!!) if tmp1.tag.POS=="COMP": tonal_words+=1 # проверка на орфографическую правильность if c.check(key.normal_form)==False: orf_err+=1 #print(key)#вывод некорректных слов #****************************************** if key in lsWord: value = lsWord[key] lsWord[key] = value + 1 total_words+=1 else: lsWord[key] = 1 total_words+=1 #ищем "?" и "!" for key in res1: key = key.lower() if key in lsWord: value = lsWord[key] lsWord[key] = value + 1 tonal+=1 else: lsWord[key] = 1 tonal+=1 # создаем список ключей отсортированный по значению словаря lsWord sorted_keys = sorted(lsWord, key=lambda x: int(lsWord[x]), reverse=True) file = open(work_file + '_dict.csv', 'w') morph = pymorphy2.MorphAnalyzer() try: for key in sorted_keys: #преобразование к инфинитиву #key.normal_form p=morph.parse(key)[0] words=p.normal_form s = str("{0};{1}\r").format(words, lsWord[key]) file.write(s) print('Результат записан: ' + work_file + '_dict.csv') finally: file.close() return orf_err,total_words,tonal,stop_word,tonal_words,len(matches)