def main(): '''Read the file and run grammalecte on it''' # Load grammalecte. gce.load() dictionary = gce.getDictionary() tokenizer = tkz.Tokenizer("fr") # Read input from stdin or first arg. text_input = [line for line in fileinput.input()] text, lineset = txt.createParagraphWithLines(list(enumerate(text_input))) # Grammar errors gramm_err = gce.parse(text, "FR", bDebug=False, bContext=True) # Spelling errors spell_err = [] for token in tokenizer.genTokens(text): if token['sType'] == "WORD" and not dictionary.isValidToken( token['sValue']): spell_err.append(token) # Get colums and lines. gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset) # Output for i in list(gramm_err): print('grammaire|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1, i['sMessage'])) for i in list(spell_err): print('orthographe|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1, 'Mot absent du dictionnaire'))
def __call_grammalecte(self) -> List[GrammalecteError]: """ Prepare and call Grammalecte. :return: The list (may be empty) of errors. """ config = self._requester.get_config() if config is None: raise _IgnoredException # Set parameters to Grammalecte grammarChecker = GrammarChecker("fr") grammarChecker.getGCEngine().setOptions( config.get_value(GrammalecteConfig.ANALYZE_OPTIONS)) for ignoredRule in config.get_all_values(GrammalecteConfig.IGNORED_RULES): grammarChecker.gce.ignoreRule(ignoredRule) # Analyze text found_errors: List[GrammalecteError] = [] for text, lineDefinition in _SingleAnalyzer.__createParagraphs( self._requester.get_text(), config.get_value(GrammalecteConfig.CONCAT_LINES)): grammErrs, spellErrs = grammarChecker.getParagraphErrors( text, bContext=True, bSpellSugg=True) grammErrs, spellErrs = Text.convertToXY( grammErrs, spellErrs, lineDefinition) found_errors.extend(GrammalecteError.buildErrorList(grammErrs)) found_errors.extend(GrammalecteError.buildErrorList(spellErrs)) return found_errors
def find_errors(input_file, opts={}): """Read the file and run grammalecte on it""" with open(input_file, "r") as f: lines = f.readlines() border = opts.get("border") if not border or border == "": # No borders, simply join text lines document_offset = 0 raw_text = "".join(lines) debug("No border to detect") else: debug(str(border)) # May be None document_offset, raw_text = _compute_offset(lines, border) debug("Border found at {}".format(document_offset)) # Cleanup text by redacting all matching patterns. for pattern in opts.get("filters", []): raw_text = _redact_text(re.compile(pattern), raw_text) debug(raw_text) text_input = raw_text.splitlines() text, lineset = txt.createParagraphWithLines(list(enumerate(text_input))) do_gramm = not opts.get("no_gramm", False) do_spell = not opts.get("no_spell", False) gramm_err = spell_err = [] # Load grammalecte. gc = grammalecte.GrammarChecker("fr") # Compute grammar and spell check errors if do_gramm: gc.gce.setOption("apos", not opts.get("no_apos", False)) gc.gce.setOption("nbsp", not opts.get("no_nbsp", False)) gc.gce.setOption("esp", not opts.get("no_esp", False)) gc.gce.setOption("tab", not opts.get("no_esp", False)) gramm_err = gc.gce.parse(text, "FR", bDebug=False) if do_spell: spell_err = gc.oSpellChecker.parseParagraph(text, True) # Get colums and lines. gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset) if do_gramm: final_errors = _prepare_gramm_errors(gramm_err, document_offset, text_input) else: final_errors = [] if do_spell: final_errors += _prepare_spell_errors(spell_err, document_offset) return sorted(final_errors, key=itemgetter(2, 4))
def generateJSON(iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, lLineSet=None, bReturnText=False): aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bDebug) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" if lLineSet: aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet) return json.dumps( { "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) if bReturnText: return json.dumps( { "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) return json.dumps( { "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
def main(files, opts={}): """Read the file and run grammalecte on it""" # Read input from stdin or first arg. text_input = [line for line in fileinput.input(files=files)] text, lineset = txt.createParagraphWithLines(list(enumerate(text_input))) do_gramm = ("no_gramm" not in opts or opts["no_gramm"] is False) do_spell = ("no_spell" not in opts or opts["no_spell"] is False) gramm_err = spell_err = [] # Load grammalecte. gc = grammalecte.GrammarChecker("fr") # Compute grammar and spell check errors if do_gramm: gc.gce.setOption("apos", "no_apos" not in opts or opts["no_apos"] is False) gc.gce.setOption("nbsp", "no_nbsp" not in opts or opts["no_nbsp"] is False) gc.gce.setOption("esp", "no_esp" not in opts or opts["no_esp"] is False) gc.gce.setOption("tab", "no_esp" not in opts or opts["no_esp"] is False) gramm_err = gc.gce.parse(text, "FR", bDebug=False) if do_spell: spell_err = gc.oSpellChecker.parseParagraph(text, False) # Get colums and lines. gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset) org_keywords = [ "author", "caption", "category", "creator", "date", "email", "header", "keywords", "language", "name", "options", "title", "attr_.+" ] # Output if do_gramm: org_re = re.compile("^#\\+(?:{})\\:$".format("|".join(org_keywords)), re.IGNORECASE) for i in list(gramm_err): cur_line = text_input[i["nStartY"]] if i["sType"] == "esp": # Remove useless space warning for visual paragraph in # text modes next_line_no = i["nStartY"] + 1 if next_line_no > len(text_input): # Weird, but maybe there is no blank line at the end # of the file? Or some sort of buffer overflow? next_line = "" else: next_line = text_input[next_line_no].strip() if cur_line[i["nStartX"]] == "\n" and next_line == "": continue elif i["sType"] == "nbsp": # Remove some unwanted nbsp warnings if cur_line[0:4] == "#-*-": continue # The following line is not subject to overflow # excepton, even if i["nStartX"] + 1 > len(cur_line) m = org_re.match(cur_line[0:i["nStartX"] + 1]) if m is not None and m.start() == 0: continue print("grammaire|{}|{}|{}\n".format(i["nStartY"] + 1, i["nStartX"] + 1, i["sMessage"])) if do_spell: for i in list(spell_err): cur_line = text_input[i["nStartY"]] next_char_no = i["nStartX"] + 1 org_re = re.compile("(?:{})\\:".format("|".join(org_keywords)), re.IGNORECASE) m = org_re.match(cur_line, i["nStartX"]) if m is not None and m.start() == i["nStartX"]: continue print("orthographe|{}|{}|{}\n".format( i["nStartY"] + 1, i["nStartX"] + 1, "Mot absent du dictionnaire"))