Beispiel #1
0
def main():
    '''Read the file and run grammalecte on it'''
    # Load grammalecte.
    gce.load()
    dictionary = gce.getDictionary()
    tokenizer = tkz.Tokenizer("fr")

    # Read input from stdin or first arg.
    text_input = [line for line in fileinput.input()]
    text, lineset = txt.createParagraphWithLines(list(enumerate(text_input)))

    # Grammar errors
    gramm_err = gce.parse(text, "FR", bDebug=False, bContext=True)

    # Spelling errors
    spell_err = []
    for token in tokenizer.genTokens(text):
        if token['sType'] == "WORD" and not dictionary.isValidToken(
                token['sValue']):
            spell_err.append(token)

    # Get colums and lines.
    gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset)

    # Output
    for i in list(gramm_err):
        print('grammaire|{}|{}|{}\n'.format(i['nStartY'] + 1, i['nStartX'] + 1,
                                            i['sMessage']))
    for i in list(spell_err):
        print('orthographe|{}|{}|{}\n'.format(i['nStartY'] + 1,
                                              i['nStartX'] + 1,
                                              'Mot absent du dictionnaire'))
Beispiel #2
0
    def __call_grammalecte(self) -> List[GrammalecteError]:
        """
        Prepare and call Grammalecte.

        :return: The list (may be empty) of errors.
        """
        config = self._requester.get_config()
        if config is None:
            raise _IgnoredException

        # Set parameters to Grammalecte
        grammarChecker = GrammarChecker("fr")
        grammarChecker.getGCEngine().setOptions(
            config.get_value(GrammalecteConfig.ANALYZE_OPTIONS))
        for ignoredRule in config.get_all_values(GrammalecteConfig.IGNORED_RULES):
            grammarChecker.gce.ignoreRule(ignoredRule)

        # Analyze text
        found_errors: List[GrammalecteError] = []
        for text, lineDefinition in _SingleAnalyzer.__createParagraphs(
                self._requester.get_text(), config.get_value(GrammalecteConfig.CONCAT_LINES)):
            grammErrs, spellErrs = grammarChecker.getParagraphErrors(
                text, bContext=True, bSpellSugg=True)
            grammErrs, spellErrs = Text.convertToXY(
                grammErrs, spellErrs, lineDefinition)
            found_errors.extend(GrammalecteError.buildErrorList(grammErrs))
            found_errors.extend(GrammalecteError.buildErrorList(spellErrs))
        return found_errors
Beispiel #3
0
def find_errors(input_file, opts={}):
    """Read the file and run grammalecte on it"""

    with open(input_file, "r") as f:
        lines = f.readlines()

    border = opts.get("border")
    if not border or border == "":
        # No borders, simply join text lines
        document_offset = 0
        raw_text = "".join(lines)
        debug("No border to detect")
    else:
        debug(str(border))  # May be None
        document_offset, raw_text = _compute_offset(lines, border)
        debug("Border found at {}".format(document_offset))

    # Cleanup text by redacting all matching patterns.
    for pattern in opts.get("filters", []):
        raw_text = _redact_text(re.compile(pattern), raw_text)
    debug(raw_text)
    text_input = raw_text.splitlines()

    text, lineset = txt.createParagraphWithLines(list(enumerate(text_input)))

    do_gramm = not opts.get("no_gramm", False)
    do_spell = not opts.get("no_spell", False)
    gramm_err = spell_err = []

    # Load grammalecte.
    gc = grammalecte.GrammarChecker("fr")

    # Compute grammar and spell check errors
    if do_gramm:
        gc.gce.setOption("apos", not opts.get("no_apos", False))
        gc.gce.setOption("nbsp", not opts.get("no_nbsp", False))
        gc.gce.setOption("esp", not opts.get("no_esp", False))
        gc.gce.setOption("tab", not opts.get("no_esp", False))
        gramm_err = gc.gce.parse(text, "FR", bDebug=False)

    if do_spell:
        spell_err = gc.oSpellChecker.parseParagraph(text, True)

    # Get colums and lines.
    gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset)

    if do_gramm:
        final_errors = _prepare_gramm_errors(gramm_err, document_offset,
                                             text_input)
    else:
        final_errors = []

    if do_spell:
        final_errors += _prepare_spell_errors(spell_err, document_offset)

    return sorted(final_errors, key=itemgetter(2, 4))
Beispiel #4
0
def generateJSON(iIndex,
                 sText,
                 oTokenizer,
                 oDict,
                 bContext=False,
                 bDebug=False,
                 bEmptyIfNoErrors=False,
                 lLineSet=None,
                 bReturnText=False):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext,
                                        bDebug)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    if lLineSet:
        aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs,
                                                 lLineSet)
        return json.dumps(
            {
                "lGrammarErrors": aGrammErrs,
                "lSpellingErrors": aSpellErrs
            },
            ensure_ascii=False)
    if bReturnText:
        return json.dumps(
            {
                "iParagraph": iIndex,
                "sText": sText,
                "lGrammarErrors": aGrammErrs,
                "lSpellingErrors": aSpellErrs
            },
            ensure_ascii=False)
    return json.dumps(
        {
            "iParagraph": iIndex,
            "lGrammarErrors": aGrammErrs,
            "lSpellingErrors": aSpellErrs
        },
        ensure_ascii=False)
Beispiel #5
0
def main(files, opts={}):
    """Read the file and run grammalecte on it"""

    # Read input from stdin or first arg.
    text_input = [line for line in fileinput.input(files=files)]
    text, lineset = txt.createParagraphWithLines(list(enumerate(text_input)))

    do_gramm = ("no_gramm" not in opts or opts["no_gramm"] is False)
    do_spell = ("no_spell" not in opts or opts["no_spell"] is False)
    gramm_err = spell_err = []

    # Load grammalecte.
    gc = grammalecte.GrammarChecker("fr")

    # Compute grammar and spell check errors
    if do_gramm:
        gc.gce.setOption("apos", "no_apos" not in opts
                         or opts["no_apos"] is False)
        gc.gce.setOption("nbsp", "no_nbsp" not in opts
                         or opts["no_nbsp"] is False)
        gc.gce.setOption("esp", "no_esp" not in opts
                         or opts["no_esp"] is False)
        gc.gce.setOption("tab", "no_esp" not in opts
                         or opts["no_esp"] is False)

        gramm_err = gc.gce.parse(text, "FR", bDebug=False)

    if do_spell:
        spell_err = gc.oSpellChecker.parseParagraph(text, False)

    # Get colums and lines.
    gramm_err, spell_err = txt.convertToXY(gramm_err, spell_err, lineset)

    org_keywords = [
        "author", "caption", "category", "creator", "date", "email", "header",
        "keywords", "language", "name", "options", "title", "attr_.+"
    ]

    # Output
    if do_gramm:
        org_re = re.compile("^#\\+(?:{})\\:$".format("|".join(org_keywords)),
                            re.IGNORECASE)
        for i in list(gramm_err):
            cur_line = text_input[i["nStartY"]]
            if i["sType"] == "esp":
                # Remove useless space warning for visual paragraph in
                # text modes
                next_line_no = i["nStartY"] + 1
                if next_line_no > len(text_input):
                    # Weird, but maybe there is no blank line at the end
                    # of the file? Or some sort of buffer overflow?
                    next_line = ""
                else:
                    next_line = text_input[next_line_no].strip()
                if cur_line[i["nStartX"]] == "\n" and next_line == "":
                    continue
            elif i["sType"] == "nbsp":
                # Remove some unwanted nbsp warnings
                if cur_line[0:4] == "#-*-":
                    continue
                # The following line is not subject to overflow
                # excepton, even if i["nStartX"] + 1 > len(cur_line)
                m = org_re.match(cur_line[0:i["nStartX"] + 1])
                if m is not None and m.start() == 0:
                    continue
            print("grammaire|{}|{}|{}\n".format(i["nStartY"] + 1,
                                                i["nStartX"] + 1,
                                                i["sMessage"]))

    if do_spell:
        for i in list(spell_err):
            cur_line = text_input[i["nStartY"]]
            next_char_no = i["nStartX"] + 1
            org_re = re.compile("(?:{})\\:".format("|".join(org_keywords)),
                                re.IGNORECASE)
            m = org_re.match(cur_line, i["nStartX"])
            if m is not None and m.start() == i["nStartX"]:
                continue
            print("orthographe|{}|{}|{}\n".format(
                i["nStartY"] + 1, i["nStartX"] + 1,
                "Mot absent du dictionnaire"))