コード例 #1
0
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option 'file' or 'file_to_file')", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    gce.setOptions({"html": True})
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter or xArgs.textformatteronly:
        oTF = tf.TextFormatter()

    sText = clipboard.get()
    bDebug = False
    for sParagraph in txt.getParagraph(sText):
        if xArgs.textformatter:
            sText = oTF.formatText(sText)
        sRes = generateText(0, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width, bDebug=bDebug, bEmptyIfNoErrors=True)
        if sRes:
            clipboard.set(sRes)
        else:
            clipboard.set("No errors found.")
    print(sRes)
コード例 #2
0
def main():
    '''Read the file and run grammalecte on it'''
    # Load grammalecte.
    gce.load()
    dictionary = gce.getDictionary()
    tokenizer = tkz.Tokenizer("fr")

    # Read input from stdin or first arg.
    text_input = [line for line in fileinput.input()]
    text, lineset = txt.createParagraphWithLines(list(enumerate(text_input)))

    # Grammar errors
    gramm_err = gce.parse(text, "FR", bDebug=False, bContext=True)

    # Spelling errors
    spell_err = []
    for token in tokenizer.genTokens(text):
        if token['sType'] == "WORD" and not dictionary.isValidToken(
                token['sValue']):
            spell_err.append(token)

    # Get colums and lines.
    gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset)

    # Output
    for i in list(gramm_err):
        print('grammaire|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1,
                                            i['sMessage']))
    for i in list(spell_err):
        print('orthographe|{}|{}|{}\n'.format(i['nStartY'] + 1,
                                              i['nStartX'] + 1,
                                              'Mot absent du dictionnaire'))
コード例 #3
0
ファイル: cli.py プロジェクト: seeschloss/grammalecte
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true")
    xParser.add_argument("-p", "--parse", help="parse and display sentence structure", action="store_true")
    xParser.add_argument("-v", "--validate", help="validate text only", action="store_true")
    xParser.add_argument("-a", "--autocorrect", help="try to correct automatically", action="store_true")
    xParser.add_argument("-i", "--ignore-rule", help="ignore this rule (can be used more than once)", action="append", default=[])
    xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    gce.setOptions({"html": True})
    oDict = gce.getDictionary()
    oTokenizer = tzr.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)

    if xArgs.textformatter:
        oTF = tf.TextFormatter()

    sInputText = "> "
    sText = _getText(sInputText)

    errors = False

    while sText:
        if xArgs.parse:
            for sWord in sText.split():
                if sWord:
                    echo("* {}".format(sWord))
                    for sMorph in oDict.getMorph(sWord):
                        echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
        else:
            if xArgs.textformatter:
                sText = oTF.formatText(sText)
                sys.stdout.write(sText)

            res = parser(sText, oTokenizer, oDict, bDebug=xArgs.debug, aIgnoredRules=xArgs.ignore_rule)

            if xArgs.validate:
                if res:
                    errors = True
            else:
                if res:
                    showResult(sText, res, xArgs.autocorrect)
                    errors = True
                else:
                    echo("No error found")

        sText = _getText(sInputText)

    if errors:
        sys.exit(1)
コード例 #4
0
ファイル: fr_test.py プロジェクト: maelvls/grammalecte
def perf(sVersion):
    print("\nPerformance tests")
    gce.load()
    aErrs = gce.parse(
        "Texte sans importance… utile pour la compilation des règles avant le calcul des perfs."
    )

    with open("./tests/fr/perf.txt", "r", encoding="utf-8") as hSrc, \
         open("./tests/fr/perf_memo.txt", "a", encoding="utf-8") as hDst:
        hDst.write("{:<12}{:<20}".format(sVersion,
                                         time.strftime("%Y.%m.%d %H:%M")))
        for sText in (s.strip() for s in hSrc
                      if not s.startswith("#") and s.strip()):
            with timeblock(sText[:sText.find(".")], hDst):
                aErrs = gce.parse(sText)
        hDst.write("\n")
コード例 #5
0
ファイル: Grammalecte.py プロジェクト: seeschloss/grammalecte
 def __init__ (self, ctx, *args):
     self.ctx = ctx
     self.ServiceName = "com.sun.star.linguistic2.Proofreader"
     self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gce.pkg
     self.SupportedServiceNames = (self.ServiceName, )
     self.locales = []
     for i in gce.locales:
         l = gce.locales[i]
         self.locales.append(Locale(l[0], l[1], l[2]))
     self.locales = tuple(self.locales)
     xCurCtx = uno.getComponentContext()
     # init
     gce.load()
     # GC options
     # opt_handler.load(xCurCtx)
     dOpt = Options.load(xCurCtx)
     gce.setOptions(dOpt)
     # store for results of big paragraphs
     self.dResult = {}
     self.nMaxRes = 1500
     self.lLastRes = deque(maxlen=self.nMaxRes)
     self.nRes = 0
コード例 #6
0
 def __init__(self, ctx, *args):
     self.ctx = ctx
     self.ServiceName = "com.sun.star.linguistic2.Proofreader"
     self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gce.pkg
     self.SupportedServiceNames = (self.ServiceName, )
     self.locales = []
     for i in gce.locales:
         l = gce.locales[i]
         self.locales.append(Locale(l[0], l[1], l[2]))
     self.locales = tuple(self.locales)
     xCurCtx = uno.getComponentContext()
     # init
     gce.load()
     # GC options
     # opt_handler.load(xCurCtx)
     dOpt = Options.load(xCurCtx)
     gce.setOptions(dOpt)
     # store for results of big paragraphs
     self.dResult = {}
     self.nMaxRes = 1500
     self.lLastRes = deque(maxlen=self.nMaxRes)
     self.nRes = 0
コード例 #7
0
ファイル: cli.py プロジェクト: mrmen/machange
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option 'file' or 'file_to_file')", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    gce.setOptions({"html": True})
    echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter or xArgs.textformatteronly:
        oTF = tf.TextFormatter()

    sFile = xArgs.file or xArgs.file_to_file
    if sFile:
        # file processing
        hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8")  if xArgs.file_to_file or sys.platform == "win32"  else None
        bComma = False
        if xArgs.json:
            output('{ "grammalecte": "'+gce.version+'", "lang": "'+gce.lang+'", "data" : [\n', hDst)
        for i, sText in enumerate(readfile(sFile), 1):
            if xArgs.textformatter or xArgs.textformatteronly:
                sText = oTF.formatText(sText)
            if xArgs.textformatteronly:
                output(sText, hDst)
            else:
                sText = generateText(i, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
            if hDst:
                echo("§ %d\r" % i, end="", flush=True)
        if xArgs.json:
            output("\n]}\n", hDst)
    else:
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        bDebug = False
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("/+"):
                gce.setOptions({ opt:True  for opt in sText[2:].strip().split()  if opt in gce.getOptions() })
            elif sText.startswith("/-"):
                gce.setOptions({ opt:False  for opt in sText[2:].strip().split()  if opt in gce.getOptions() })
            elif sText == "/debug" or sText == "/d":
                bDebug = not(bDebug)
                echo("debug mode on"  if bDebug  else "debug mode off")
            elif sText == "/help" or sText == "/h":
                echo(_HELP)
            elif sText == "/lopt" or sText == "/l":
                echo("\n".join( [ k+":\t"+str(v)  for k, v  in sorted(gce.getOptions().items()) ] ))
            elif sText == "/quit" or sText == "/q":
                break
            elif sText.startswith("/rl"):
                # reload (todo)
                pass
            else:
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    sRes = generateText(0, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width, bDebug=bDebug, bEmptyIfNoErrors=True)
                    if sRes:
                        echo("\n" + sRes)
                    else:
                        echo("\nNo error found.")
            sText = _getText(sInputText)
コード例 #8
0
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true")
    xArgs = xParser.parse_args()

    if sys.platform == "win32" and xArgs.file:
        xArgs.file_to_file = xArgs.file
        xArgs.file = None

    gce.load()
    gce.setOptions({"html": True})
    echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tzr.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter:
        oTF = tf.TextFormatter()

    if xArgs.file:
        if os.path.isfile(xArgs.file):
            with open(xArgs.file, "r", encoding="utf-8") as hSrc:
                for sText in hSrc:
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    echo(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug))
        else:
            print("# Error: file not found.")
    elif xArgs.file_to_file:
        if os.path.isfile(xArgs.file_to_file):
            with open(xArgs.file_to_file, "r", encoding="utf-8") as hSrc, \
                 open(xArgs.file_to_file[:xArgs.file_to_file.rfind(".")]+".res.txt", "w", encoding="utf-8") as hDst:
                for i, sText in enumerate(hSrc, 1):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    hDst.write(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug))
                    print("§ %d\r" % i, end="", flush=True)
        else:
            print("# Error: file not found.")
    else:
        sInputText = "\n~==========~ Écrivez votre texte [Entrée pour quitter] ~==========~\n"
        sText = _getText(sInputText)
        while sText:
            if sText.startswith("?"):
                for sWord in sText[1:].split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText == "rl":
                # reload (todo)
                pass
            else:
                if xArgs.textformatter:
                    sText = oTF.formatText(sText)
                res = parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug, bEmptyIfNoErrors=True)
                echo("\n"+res  if res  else "\nNo error found.")
            sText = _getText(sInputText)
コード例 #9
0
def main():
    xParser = argparse.ArgumentParser()
    xParser.add_argument(
        "-f",
        "--file",
        help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]",
        type=str)
    xParser.add_argument(
        "-ff",
        "--file_to_file",
        help=
        "parse file (UTF-8 required!) and create a result file (*.res.txt)",
        type=str)
    xParser.add_argument("-owe",
                         "--only_when_errors",
                         help="display results only when there are errors",
                         action="store_true")
    xParser.add_argument(
        "-j",
        "--json",
        help=
        "generate list of errors in JSON (only with option --file or --file_to_file)",
        action="store_true")
    xParser.add_argument(
        "-cl",
        "--concat_lines",
        help=
        "concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)",
        action="store_true")
    xParser.add_argument(
        "-tf",
        "--textformatter",
        help=
        "auto-format text according to typographical rules (unavailable with option --concat_lines)",
        action="store_true")
    xParser.add_argument(
        "-tfo",
        "--textformatteronly",
        help=
        "auto-format text and disable grammar checking (only with option --file or --file_to_file)",
        action="store_true")
    xParser.add_argument(
        "-ctx",
        "--context",
        help="return errors with context (only with option --json)",
        action="store_true")
    xParser.add_argument(
        "-w",
        "--width",
        help="width in characters (40 < width < 200; default: 100)",
        type=int,
        choices=range(40, 201, 10),
        default=100)
    xParser.add_argument("-lo",
                         "--list_options",
                         help="list options",
                         action="store_true")
    xParser.add_argument("-lr",
                         "--list_rules",
                         nargs="?",
                         help="list rules [regex pattern as filter]",
                         const="*")
    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off",
                         "--opt_off",
                         nargs="+",
                         help="deactivate options")
    xParser.add_argument("-roff",
                         "--rule_off",
                         nargs="+",
                         help="deactivate rules")
    xParser.add_argument("-d",
                         "--debug",
                         help="debugging mode (only in interactive mode)",
                         action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    if not xArgs.json:
        echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter or xArgs.textformatteronly:
        oTF = tf.TextFormatter()

    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
            gce.displayOptions("fr")
        if xArgs.list_rules:
            gce.displayRules(None if xArgs.list_rules ==
                             "*" else xArgs.list_rules)
        exit()

    if not xArgs.json:
        xArgs.context = False

    gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        gce.setOptions(
            {opt: True
             for opt in xArgs.opt_on if opt in gce.getOptions()})
    if xArgs.opt_off:
        gce.setOptions(
            {opt: False
             for opt in xArgs.opt_off if opt in gce.getOptions()})

    if xArgs.rule_off:
        for sRule in xArgs.rule_off:
            gce.ignoreRule(sRule)

    sFile = xArgs.file or xArgs.file_to_file
    if sFile:
        # file processing
        hDst = open(
            sFile[:sFile.rfind(".")] + ".res.txt", "w", encoding="utf-8"
        ) if xArgs.file_to_file or sys.platform == "win32" else None
        bComma = False
        if xArgs.json:
            output(
                '{ "grammalecte": "' + gce.version + '", "lang": "' +
                gce.lang + '", "data" : [\n', hDst)
        if not xArgs.concat_lines:
            # pas de concaténation des lignes
            for i, sText in enumerate(readfile(sFile), 1):
                if xArgs.textformatter or xArgs.textformatteronly:
                    sText = oTF.formatText(sText)
                if xArgs.textformatteronly:
                    output(sText, hDst)
                else:
                    if xArgs.json:
                        sText = generateJSON(
                            i,
                            sText,
                            oTokenizer,
                            oDict,
                            bContext=xArgs.context,
                            bDebug=False,
                            bEmptyIfNoErrors=xArgs.only_when_errors,
                            bReturnText=xArgs.textformatter)
                    else:
                        sText = generateText(
                            sText,
                            oTokenizer,
                            oDict,
                            bDebug=False,
                            bEmptyIfNoErrors=xArgs.only_when_errors,
                            nWidth=xArgs.width)
                    if sText:
                        if xArgs.json and bComma:
                            output(",\n", hDst)
                        output(sText, hDst)
                        bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        else:
            # concaténation des lignes non séparées par une ligne vide
            for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                if xArgs.json:
                    sText = generateJSON(
                        i,
                        sText,
                        oTokenizer,
                        oDict,
                        bContext=xArgs.context,
                        bDebug=False,
                        bEmptyIfNoErrors=xArgs.only_when_errors,
                        lLineSet=lLineSet)
                else:
                    sText = generateText(
                        sText,
                        oTokenizer,
                        oDict,
                        bDebug=False,
                        bEmptyIfNoErrors=xArgs.only_when_errors,
                        nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        if xArgs.json:
            output("\n]}\n", hDst)
    else:
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(
                                sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("/+ "):
                gce.setOptions({
                    opt: True
                    for opt in sText[3:].strip().split()
                    if opt in gce.getOptions()
                })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({
                    opt: False
                    for opt in sText[3:].strip().split()
                    if opt in gce.getOptions()
                })
                echo("done")
            elif sText.startswith("/-- "):
                for sRule in sText[3:].strip().split():
                    gce.ignoreRule(sRule)
                echo("done")
            elif sText.startswith("/++ "):
                for sRule in sText[3:].strip().split():
                    gce.reactivateRule(sRule)
                echo("done")
            elif sText == "/debug" or sText == "/d":
                xArgs.debug = not (xArgs.debug)
                echo("debug mode on" if xArgs.debug else "debug mode off")
            elif sText == "/textformatter" or sText == "/tf":
                xArgs.textformatter = not (xArgs.textformatter)
                echo(
                    "textformatter on" if xArgs.debug else "textformatter off")
            elif sText == "/help" or sText == "/h":
                echo(_HELP)
            elif sText == "/lopt" or sText == "/l":
                gce.displayOptions("fr")
            elif sText.startswith("/lr"):
                sText = sText.strip()
                sFilter = sText[sText.find(" "):].strip(
                ) if sText != "/lr" and sText != "/rules" else None
                gce.displayRules(sFilter)
            elif sText == "/quit" or sText == "/q":
                break
            elif sText.startswith("/rl"):
                # reload (todo)
                pass
            else:
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    sRes = generateText(
                        sText,
                        oTokenizer,
                        oDict,
                        bDebug=xArgs.debug,
                        bEmptyIfNoErrors=xArgs.only_when_errors,
                        nWidth=xArgs.width)
                    if sRes:
                        echo("\n" + sRes)
                    else:
                        echo("\nNo error found.")
            sText = _getText(sInputText)
コード例 #10
0
                dToken['sValue']):
            aSpellErrs.append(dToken)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    return "  " + json.dumps(
        {
            "iParagraph": iParagraph,
            "lGrammarErrors": aGrammErrs,
            "lSpellingErrors": aSpellErrs
        },
        ensure_ascii=False)


if __name__ == '__main__':

    gce.load()
    echo("Grammalecte v{}".format(gce.version))
    dServerOptions = getServerOptions()
    dGCOptions = getConfigOptions("fr")
    if dGCOptions:
        gce.setOptions(dGCOptions)
    dServerGCOptions = gce.getOptions()
    echo("Grammar options:\n" + " | ".join(
        [k + ": " + str(v) for k, v in sorted(dServerGCOptions.items())]))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oTF = tf.TextFormatter()
    dUser = {}
    userGenerator = genUserId()

    app = Bottle()
コード例 #11
0
ファイル: fr_test.py プロジェクト: maelvls/grammalecte
 def setUpClass(cls):
     gce.load()
     cls._zError = re.compile(r"\{\{.*?\}\}")
     cls._aRuleTested = set()