def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-j", "--json", help="generate list of errors in JSON", action="store_true") xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100) xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules", action="store_true") xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option 'file' or 'file_to_file')", action="store_true") xArgs = xParser.parse_args() gce.load() gce.setOptions({"html": True}) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter or xArgs.textformatteronly: oTF = tf.TextFormatter() sText = clipboard.get() bDebug = False for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: sText = oTF.formatText(sText) sRes = generateText(0, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width, bDebug=bDebug, bEmptyIfNoErrors=True) if sRes: clipboard.set(sRes) else: clipboard.set("No errors found.") print(sRes)
def main(): '''Read the file and run grammalecte on it''' # Load grammalecte. gce.load() dictionary = gce.getDictionary() tokenizer = tkz.Tokenizer("fr") # Read input from stdin or first arg. text_input = [line for line in fileinput.input()] text, lineset = txt.createParagraphWithLines(list(enumerate(text_input))) # Grammar errors gramm_err = gce.parse(text, "FR", bDebug=False, bContext=True) # Spelling errors spell_err = [] for token in tokenizer.genTokens(text): if token['sType'] == "WORD" and not dictionary.isValidToken( token['sValue']): spell_err.append(token) # Get colums and lines. gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset) # Output for i in list(gramm_err): print('grammaire|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1, i['sMessage'])) for i in list(spell_err): print('orthographe|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1, 'Mot absent du dictionnaire'))
def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true") xParser.add_argument("-p", "--parse", help="parse and display sentence structure", action="store_true") xParser.add_argument("-v", "--validate", help="validate text only", action="store_true") xParser.add_argument("-a", "--autocorrect", help="try to correct automatically", action="store_true") xParser.add_argument("-i", "--ignore-rule", help="ignore this rule (can be used more than once)", action="append", default=[]) xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true") xArgs = xParser.parse_args() gce.load() gce.setOptions({"html": True}) oDict = gce.getDictionary() oTokenizer = tzr.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter: oTF = tf.TextFormatter() sInputText = "> " sText = _getText(sInputText) errors = False while sText: if xArgs.parse: for sWord in sText.split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) else: if xArgs.textformatter: sText = oTF.formatText(sText) sys.stdout.write(sText) res = parser(sText, oTokenizer, oDict, bDebug=xArgs.debug, aIgnoredRules=xArgs.ignore_rule) if xArgs.validate: if res: errors = True else: if res: showResult(sText, res, xArgs.autocorrect) errors = True else: echo("No error found") sText = _getText(sInputText) if errors: sys.exit(1)
def perf(sVersion): print("\nPerformance tests") gce.load() aErrs = gce.parse( "Texte sans importance… utile pour la compilation des règles avant le calcul des perfs." ) with open("./tests/fr/perf.txt", "r", encoding="utf-8") as hSrc, \ open("./tests/fr/perf_memo.txt", "a", encoding="utf-8") as hDst: hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) for sText in (s.strip() for s in hSrc if not s.startswith("#") and s.strip()): with timeblock(sText[:sText.find(".")], hDst): aErrs = gce.parse(sText) hDst.write("\n")
def __init__ (self, ctx, *args): self.ctx = ctx self.ServiceName = "com.sun.star.linguistic2.Proofreader" self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gce.pkg self.SupportedServiceNames = (self.ServiceName, ) self.locales = [] for i in gce.locales: l = gce.locales[i] self.locales.append(Locale(l[0], l[1], l[2])) self.locales = tuple(self.locales) xCurCtx = uno.getComponentContext() # init gce.load() # GC options # opt_handler.load(xCurCtx) dOpt = Options.load(xCurCtx) gce.setOptions(dOpt) # store for results of big paragraphs self.dResult = {} self.nMaxRes = 1500 self.lLastRes = deque(maxlen=self.nMaxRes) self.nRes = 0
def __init__(self, ctx, *args): self.ctx = ctx self.ServiceName = "com.sun.star.linguistic2.Proofreader" self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gce.pkg self.SupportedServiceNames = (self.ServiceName, ) self.locales = [] for i in gce.locales: l = gce.locales[i] self.locales.append(Locale(l[0], l[1], l[2])) self.locales = tuple(self.locales) xCurCtx = uno.getComponentContext() # init gce.load() # GC options # opt_handler.load(xCurCtx) dOpt = Options.load(xCurCtx) gce.setOptions(dOpt) # store for results of big paragraphs self.dResult = {} self.nMaxRes = 1500 self.lLastRes = deque(maxlen=self.nMaxRes) self.nRes = 0
def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-j", "--json", help="generate list of errors in JSON", action="store_true") xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100) xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules", action="store_true") xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option 'file' or 'file_to_file')", action="store_true") xArgs = xParser.parse_args() gce.load() gce.setOptions({"html": True}) echo("Grammalecte v{}".format(gce.version)) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter or xArgs.textformatteronly: oTF = tf.TextFormatter() sFile = xArgs.file or xArgs.file_to_file if sFile: # file processing hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8") if xArgs.file_to_file or sys.platform == "win32" else None bComma = False if xArgs.json: output('{ "grammalecte": "'+gce.version+'", "lang": "'+gce.lang+'", "data" : [\n', hDst) for i, sText in enumerate(readfile(sFile), 1): if xArgs.textformatter or xArgs.textformatteronly: sText = oTF.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) else: sText = generateText(i, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True if hDst: echo("§ %d\r" % i, end="", flush=True) if xArgs.json: output("\n]}\n", hDst) else: # pseudo-console sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n" sText = _getText(sInputText) bDebug = False while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) elif sText.startswith("/+"): gce.setOptions({ opt:True for opt in sText[2:].strip().split() if opt in gce.getOptions() }) elif sText.startswith("/-"): gce.setOptions({ opt:False for opt in sText[2:].strip().split() if opt in gce.getOptions() }) elif sText == "/debug" or sText == "/d": bDebug = not(bDebug) echo("debug mode on" if bDebug else "debug mode off") elif sText == "/help" or sText == "/h": echo(_HELP) elif sText == "/lopt" or sText == "/l": echo("\n".join( [ k+":\t"+str(v) for k, v in sorted(gce.getOptions().items()) ] )) elif sText == "/quit" or sText == "/q": break elif sText.startswith("/rl"): # reload (todo) pass else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: sText = oTF.formatText(sText) sRes = generateText(0, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width, bDebug=bDebug, bEmptyIfNoErrors=True) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText)
def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true") xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100) xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true") xArgs = xParser.parse_args() if sys.platform == "win32" and xArgs.file: xArgs.file_to_file = xArgs.file xArgs.file = None gce.load() gce.setOptions({"html": True}) echo("Grammalecte v{}".format(gce.version)) oDict = gce.getDictionary() oTokenizer = tzr.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter: oTF = tf.TextFormatter() if xArgs.file: if os.path.isfile(xArgs.file): with open(xArgs.file, "r", encoding="utf-8") as hSrc: for sText in hSrc: if xArgs.textformatter: sText = oTF.formatText(sText) echo(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug)) else: print("# Error: file not found.") elif xArgs.file_to_file: if os.path.isfile(xArgs.file_to_file): with open(xArgs.file_to_file, "r", encoding="utf-8") as hSrc, \ open(xArgs.file_to_file[:xArgs.file_to_file.rfind(".")]+".res.txt", "w", encoding="utf-8") as hDst: for i, sText in enumerate(hSrc, 1): if xArgs.textformatter: sText = oTF.formatText(sText) hDst.write(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug)) print("§ %d\r" % i, end="", flush=True) else: print("# Error: file not found.") else: sInputText = "\n~==========~ Écrivez votre texte [Entrée pour quitter] ~==========~\n" sText = _getText(sInputText) while sText: if sText.startswith("?"): for sWord in sText[1:].split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) elif sText == "rl": # reload (todo) pass else: if xArgs.textformatter: sText = oTF.formatText(sText) res = parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug, bEmptyIfNoErrors=True) echo("\n"+res if res else "\nNo error found.") sText = _getText(sInputText)
def main(): xParser = argparse.ArgumentParser() xParser.add_argument( "-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument( "-ff", "--file_to_file", help= "parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true") xParser.add_argument( "-j", "--json", help= "generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true") xParser.add_argument( "-cl", "--concat_lines", help= "concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true") xParser.add_argument( "-tf", "--textformatter", help= "auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true") xParser.add_argument( "-tfo", "--textformatteronly", help= "auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true") xParser.add_argument( "-ctx", "--context", help="return errors with context (only with option --json)", action="store_true") xParser.add_argument( "-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40, 201, 10), default=100) xParser.add_argument("-lo", "--list_options", help="list options", action="store_true") xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*") xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options") xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options") xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules") xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true") xArgs = xParser.parse_args() gce.load() if not xArgs.json: echo("Grammalecte v{}".format(gce.version)) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter or xArgs.textformatteronly: oTF = tf.TextFormatter() if xArgs.list_options or xArgs.list_rules: if xArgs.list_options: gce.displayOptions("fr") if xArgs.list_rules: gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) exit() if not xArgs.json: xArgs.context = False gce.setOptions({"html": True, "latex": True}) if xArgs.opt_on: gce.setOptions( {opt: True for opt in xArgs.opt_on if opt in gce.getOptions()}) if xArgs.opt_off: gce.setOptions( {opt: False for opt in xArgs.opt_off if opt in gce.getOptions()}) if xArgs.rule_off: for sRule in xArgs.rule_off: gce.ignoreRule(sRule) sFile = xArgs.file or xArgs.file_to_file if sFile: # file processing hDst = open( sFile[:sFile.rfind(".")] + ".res.txt", "w", encoding="utf-8" ) if xArgs.file_to_file or sys.platform == "win32" else None bComma = False if xArgs.json: output( '{ "grammalecte": "' + gce.version + '", "lang": "' + gce.lang + '", "data" : [\n', hDst) if not xArgs.concat_lines: # pas de concaténation des lignes for i, sText in enumerate(readfile(sFile), 1): if xArgs.textformatter or xArgs.textformatteronly: sText = oTF.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) else: if xArgs.json: sText = generateJSON( i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bReturnText=xArgs.textformatter) else: sText = generateText( sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True if hDst: echo("§ %d\r" % i, end="", flush=True) else: # concaténation des lignes non séparées par une ligne vide for i, lLine in enumerate(readfileAndConcatLines(sFile), 1): sText, lLineSet = txt.createParagraphWithLines(lLine) if xArgs.json: sText = generateJSON( i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, lLineSet=lLineSet) else: sText = generateText( sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True if hDst: echo("§ %d\r" % i, end="", flush=True) if xArgs.json: output("\n]}\n", hDst) else: # pseudo-console sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n" sText = _getText(sInputText) while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format( sMorph, oLexGraphe.formatTags(sMorph))) elif sText.startswith("/+ "): gce.setOptions({ opt: True for opt in sText[3:].strip().split() if opt in gce.getOptions() }) echo("done") elif sText.startswith("/- "): gce.setOptions({ opt: False for opt in sText[3:].strip().split() if opt in gce.getOptions() }) echo("done") elif sText.startswith("/-- "): for sRule in sText[3:].strip().split(): gce.ignoreRule(sRule) echo("done") elif sText.startswith("/++ "): for sRule in sText[3:].strip().split(): gce.reactivateRule(sRule) echo("done") elif sText == "/debug" or sText == "/d": xArgs.debug = not (xArgs.debug) echo("debug mode on" if xArgs.debug else "debug mode off") elif sText == "/textformatter" or sText == "/tf": xArgs.textformatter = not (xArgs.textformatter) echo( "textformatter on" if xArgs.debug else "textformatter off") elif sText == "/help" or sText == "/h": echo(_HELP) elif sText == "/lopt" or sText == "/l": gce.displayOptions("fr") elif sText.startswith("/lr"): sText = sText.strip() sFilter = sText[sText.find(" "):].strip( ) if sText != "/lr" and sText != "/rules" else None gce.displayRules(sFilter) elif sText == "/quit" or sText == "/q": break elif sText.startswith("/rl"): # reload (todo) pass else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: sText = oTF.formatText(sText) sRes = generateText( sText, oTokenizer, oDict, bDebug=xArgs.debug, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText)
dToken['sValue']): aSpellErrs.append(dToken) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" return " " + json.dumps( { "iParagraph": iParagraph, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) if __name__ == '__main__': gce.load() echo("Grammalecte v{}".format(gce.version)) dServerOptions = getServerOptions() dGCOptions = getConfigOptions("fr") if dGCOptions: gce.setOptions(dGCOptions) dServerGCOptions = gce.getOptions() echo("Grammar options:\n" + " | ".join( [k + ": " + str(v) for k, v in sorted(dServerGCOptions.items())])) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oTF = tf.TextFormatter() dUser = {} userGenerator = genUserId() app = Bottle()
def setUpClass(cls): gce.load() cls._zError = re.compile(r"\{\{.*?\}\}") cls._aRuleTested = set()