def countGroupInRegex(sRegex): try: return re.compile(sRegex).groups except: traceback.print_exc() echo(sRegex) return 0
def getServerOptions(): xConfig = configparser.SafeConfigParser() try: xConfig.read("server_options._global.ini") dOpt = xConfig._sections['options'] except: echo( "Options file [server_options._global.ini] not found or not readable" ) exit() return dOpt
def test_parse(self): zOption = re.compile("^__([a-zA-Z0-9]+)__ ") bShowUntested = False for sf in ["gc_test.txt"]: with self.subTest(msg=sf): with open("./tests/fr/" + sf, "r", encoding="utf-8") as hSrc: for sLine in (s for s in hSrc if not s.startswith("#") and s.strip()): sLineNum = sLine[:10].strip() sLine = sLine[10:].strip() sOption = None m = zOption.search(sLine) if m: sLine = sLine[m.end():] sOption = m.group(1) if "->>" in sLine: sErrorText, sExceptedSuggs = self._splitTestLine( sLine) if sExceptedSuggs.startswith( '"') and sExceptedSuggs.endswith('"'): sExceptedSuggs = sExceptedSuggs[1:-1] else: sErrorText = sLine.strip() sExceptedSuggs = "" sExpectedErrors = self._getExpectedErrors(sErrorText) sTextToCheck = sErrorText.replace("}}", "").replace( "{{", "") sFoundErrors, sListErr, sFoundSuggs = self._getFoundErrors( sTextToCheck, sOption) self.assertEqual(sExpectedErrors, sFoundErrors, \ "\n# Line num: " + sLineNum + \ "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + \ "\n expected: " + sExpectedErrors + \ "\n found: " + sFoundErrors + \ "\n errors: \n" + sListErr) if sExceptedSuggs: self.assertEqual( sExceptedSuggs, sFoundSuggs, "\n# Line num: " + sLineNum + "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + "\n errors: \n" + sListErr) bShowUntested = True if bShowUntested: i = 0 for sOpt, sLineId, sRuleId in gce.listRules(): if sLineId not in self._aRuleTested and not re.match( "[0-9]+[sp]$", sRuleId): echo(sRuleId, end=", ") i += 1 if i: echo("\n[{} untested rules]".format(i))
def regex2js(sRegex): "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions" # Latin letters: http://unicode-table.com/fr/ # 0-9 and _ # A-Z # a-z # À-Ö 00C0-00D6 (upper case) # Ø-ß 00D8-00DF (upper case) # à-ö 00E0-00F6 (lower case) # ø-ÿ 00F8-00FF (lower case) # Ā-ʯ 0100-02AF (mixed) # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ bCaseInsensitive = False if "(?i)" in sRegex: sRegex = sRegex.replace("(?i)", "") bCaseInsensitive = True lNegLookBeforeRegex = [] if WORDLIMITLEFT in sRegex: sRegex = sRegex.replace(WORDLIMITLEFT, "") lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"] sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ") sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]") sRegex = sRegex.replace("[.]", r"\.") if not sRegex.startswith("<js>"): sRegex = sRegex.replace("/", r"\/") m = re.search( r"\(\?<!([^()]+)\)", sRegex ) # Negative lookbefore assertion should always be at the beginning of regex if m: lNegLookBeforeRegex.append(m.group(1) + "$") sRegex = sRegex.replace(m.group(0), "") if "(?<" in sRegex: echo("# Warning. Lookbefore assertion not changed in:\n ") echo(sRegex) if sRegex.startswith("<js>"): sRegex = sRegex.replace('<js>', '/').replace('</js>i', '/ig').replace('</js>', '/g') else: sRegex = "/" + sRegex + "/g" if bCaseInsensitive and not sRegex.endswith("/ig"): sRegex = sRegex + "i" if not lNegLookBeforeRegex: lNegLookBeforeRegex = None return (sRegex, lNegLookBeforeRegex)
def getConfigOptions(sLang): xConfig = configparser.SafeConfigParser() try: xConfig.read("server_options." + sLang + ".ini") except: echo("Options file [server_options." + sLang + ".ini] not found or not readable") exit() try: dGCOpt = { k: bool(int(v)) for k, v in xConfig._sections['gc_options'].items() } except: echo("Error in options file [server_options." + sLang + ".ini]. Dropped.") traceback.print_exc() exit() return dGCOpt
def makePhonetTable(sp, bJS=False): print("make phonet tables") try: oDict = ibdawg.IBDAWG("French.bdic") except: traceback.print_exc() return with open(sp + "/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc: # set of homophonic words lSet = [] for sLine in hSrc.readlines(): if not sLine.startswith("#") and sLine.strip(): lSet.append(sorted(sLine.strip().split())) # dictionary of words dWord = {} for i, aSet in enumerate(lSet): for sWord in aSet: if oDict.lookup(sWord): dWord[sWord] = i # warning, what if word in several sets? else: echo("Mot inconnu : " + sWord) # dictionary of morphologies dMorph = {} for sWord in dWord: dMorph[sWord] = oDict.getMorph(sWord) # write file for Python sCode = "# generated data (do not edit)\n\n" + \ "dWord = " + str(dWord) + "\n\n" + \ "lSet = " + str(lSet) + "\n\n" + \ "dMorph = " + str(dMorph) + "\n" open(sp + "/modules/phonet_data.py", "w", encoding="utf-8").write(sCode) if bJS: ## write file for JavaScript sCode = "{\n" + \ ' "dWord": ' + json.dumps(dWord, ensure_ascii=False) + ",\n" + \ ' "lSet": ' + json.dumps(lSet, ensure_ascii=False) + ",\n" + \ ' "dMorph": ' + json.dumps(dMorph, ensure_ascii=False) + "\n}" open(sp + "/modules-js/phonet_data.json", "w", encoding="utf-8").write(sCode)
def createFirefoxExtension(sLang, dVars, bLaunchFx=False): "create extension for Firefox" print("Building extension for Firefox") eraseFolder("_build/xpi/" + sLang) dir_util.copy_tree("gc_lang/" + sLang + "/xpi/", "_build/xpi/" + sLang) dir_util.copy_tree("grammalecte-js", "_build/xpi/" + sLang + "/grammalecte") sHTML, dProperties = createOptionsForXPI(dVars) dVars['optionsHTML'] = sHTML copyAndFileTemplate("_build/xpi/" + sLang + "/data/about_panel.html", "_build/xpi/" + sLang + "/data/about_panel.html", dVars) for sLocale in dProperties.keys(): spfLocale = "_build/xpi/" + sLang + "/locale/" + sLocale + ".properties" if os.path.exists(spfLocale): copyAndFileTemplate(spfLocale, spfLocale, dProperties) else: echo("Locale file not found: " + spfLocale) with cd("_build/xpi/" + sLang): os.system("jpm xpi") if bLaunchFx: os.system("jpm run -b nightly")
def displayStats(lParagraphRules, lSentenceRules): echo(" {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX")) d, nRule = _calcRulesStats(lParagraphRules) echo("§ {:>10} actions {:>10} actions {:>10} actions in {:>8} rules". format(d['='], d['~'], d['-'], nRule)) d, nRule = _calcRulesStats(lSentenceRules) echo("s {:>10} actions {:>10} actions {:>10} actions in {:>8} rules". format(d['='], d['~'], d['-'], nRule))
def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true") xParser.add_argument("-p", "--parse", help="parse and display sentence structure", action="store_true") xParser.add_argument("-v", "--validate", help="validate text only", action="store_true") xParser.add_argument("-a", "--autocorrect", help="try to correct automatically", action="store_true") xParser.add_argument("-i", "--ignore-rule", help="ignore this rule (can be used more than once)", action="append", default=[]) xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true") xArgs = xParser.parse_args() gce.load() gce.setOptions({"html": True}) oDict = gce.getDictionary() oTokenizer = tzr.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter: oTF = tf.TextFormatter() sInputText = "> " sText = _getText(sInputText) errors = False while sText: if xArgs.parse: for sWord in sText.split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) else: if xArgs.textformatter: sText = oTF.formatText(sText) sys.stdout.write(sText) res = parser(sText, oTokenizer, oDict, bDebug=xArgs.debug, aIgnoredRules=xArgs.ignore_rule) if xArgs.validate: if res: errors = True else: if res: showResult(sText, res, xArgs.autocorrect) errors = True else: echo("No error found") sText = _getText(sInputText) if errors: sys.exit(1)
def prepareOptions(lOptionLines): "returns a dictionary with data about options" sLang = "" lStructOpt = [] lOpt = [] dOptLabel = {} for sLine in lOptionLines: sLine = sLine.strip() if sLine.startswith("OPTGROUP/"): m = re.match("OPTGROUP/([a-z0-9]+):(.+)$", sLine) lStructOpt.append( (m.group(1), list(map(str.split, m.group(2).split(","))))) elif sLine.startswith("OPTSOFTWARE:"): lOpt = [[s, {}] for s in sLine[12:].strip().split() ] # don’t use tuples (s, {}), because unknown to JS elif sLine.startswith("OPT/"): m = re.match("OPT/([a-z0-9]+):(.+)$", sLine) for i, sOpt in enumerate(m.group(2).split()): lOpt[i][1][m.group(1)] = eval(sOpt) elif sLine.startswith("OPTLANG/"): m = re.match("OPTLANG/([a-z][a-z](?:_[A-Z][A-Z]|)):(.+)$", sLine) sLang = m.group(1)[:2] dOptLabel[sLang] = {"__optiontitle__": m.group(2).strip()} elif sLine.startswith("OPTLABEL/"): m = re.match("OPTLABEL/([a-z0-9]+):(.+)$", sLine) dOptLabel[sLang][m.group(1)] = list( map(str.strip, m.group(2).split("|"))) if "|" in m.group(2) else [ m.group(2).strip(), "" ] else: echo("# Error. Wrong option line in:\n ") echo(sLine) echo(" options defined for: " + ", ".join([t[0] for t in lOpt])) dOptions = {"lStructOpt": lStructOpt, "dOptLabel": dOptLabel} dOptions.update({"dOpt" + k: v for k, v in lOpt}) return dOptions
def main(): xParser = argparse.ArgumentParser() xParser.add_argument( "-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument( "-ff", "--file_to_file", help= "parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true") xParser.add_argument( "-j", "--json", help= "generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true") xParser.add_argument( "-cl", "--concat_lines", help= "concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true") xParser.add_argument( "-tf", "--textformatter", help= "auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true") xParser.add_argument( "-tfo", "--textformatteronly", help= "auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true") xParser.add_argument( "-ctx", "--context", help="return errors with context (only with option --json)", action="store_true") xParser.add_argument( "-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40, 201, 10), default=100) xParser.add_argument("-lo", "--list_options", help="list options", action="store_true") xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*") xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options") xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options") xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules") xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true") xArgs = xParser.parse_args() gce.load() if not xArgs.json: echo("Grammalecte v{}".format(gce.version)) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter or xArgs.textformatteronly: oTF = tf.TextFormatter() if xArgs.list_options or xArgs.list_rules: if xArgs.list_options: gce.displayOptions("fr") if xArgs.list_rules: gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) exit() if not xArgs.json: xArgs.context = False gce.setOptions({"html": True, "latex": True}) if xArgs.opt_on: gce.setOptions( {opt: True for opt in xArgs.opt_on if opt in gce.getOptions()}) if xArgs.opt_off: gce.setOptions( {opt: False for opt in xArgs.opt_off if opt in gce.getOptions()}) if xArgs.rule_off: for sRule in xArgs.rule_off: gce.ignoreRule(sRule) sFile = xArgs.file or xArgs.file_to_file if sFile: # file processing hDst = open( sFile[:sFile.rfind(".")] + ".res.txt", "w", encoding="utf-8" ) if xArgs.file_to_file or sys.platform == "win32" else None bComma = False if xArgs.json: output( '{ "grammalecte": "' + gce.version + '", "lang": "' + gce.lang + '", "data" : [\n', hDst) if not xArgs.concat_lines: # pas de concaténation des lignes for i, sText in enumerate(readfile(sFile), 1): if xArgs.textformatter or xArgs.textformatteronly: sText = oTF.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) else: if xArgs.json: sText = generateJSON( i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bReturnText=xArgs.textformatter) else: sText = generateText( sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True if hDst: echo("§ %d\r" % i, end="", flush=True) else: # concaténation des lignes non séparées par une ligne vide for i, lLine in enumerate(readfileAndConcatLines(sFile), 1): sText, lLineSet = txt.createParagraphWithLines(lLine) if xArgs.json: sText = generateJSON( i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, lLineSet=lLineSet) else: sText = generateText( sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True if hDst: echo("§ %d\r" % i, end="", flush=True) if xArgs.json: output("\n]}\n", hDst) else: # pseudo-console sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n" sText = _getText(sInputText) while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format( sMorph, oLexGraphe.formatTags(sMorph))) elif sText.startswith("/+ "): gce.setOptions({ opt: True for opt in sText[3:].strip().split() if opt in gce.getOptions() }) echo("done") elif sText.startswith("/- "): gce.setOptions({ opt: False for opt in sText[3:].strip().split() if opt in gce.getOptions() }) echo("done") elif sText.startswith("/-- "): for sRule in sText[3:].strip().split(): gce.ignoreRule(sRule) echo("done") elif sText.startswith("/++ "): for sRule in sText[3:].strip().split(): gce.reactivateRule(sRule) echo("done") elif sText == "/debug" or sText == "/d": xArgs.debug = not (xArgs.debug) echo("debug mode on" if xArgs.debug else "debug mode off") elif sText == "/textformatter" or sText == "/tf": xArgs.textformatter = not (xArgs.textformatter) echo( "textformatter on" if xArgs.debug else "textformatter off") elif sText == "/help" or sText == "/h": echo(_HELP) elif sText == "/lopt" or sText == "/l": gce.displayOptions("fr") elif sText.startswith("/lr"): sText = sText.strip() sFilter = sText[sText.find(" "):].strip( ) if sText != "/lr" and sText != "/rules" else None gce.displayRules(sFilter) elif sText == "/quit" or sText == "/q": break elif sText.startswith("/rl"): # reload (todo) pass else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: sText = oTF.formatText(sText) sRes = generateText( sText, oTokenizer, oDict, bDebug=xArgs.debug, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText)
def output(sText, hDst=None): if not hDst: echo(sText, end="") else: hDst.write(sText)
def output (sText, hDst=None): if not hDst: echo(sText, end="") else: hDst.write(sText)
def createAction(sIdAction, sAction, nGroup): "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])" global FUNCTIONS m = re.search(r"([-~=>])(\d*|)>>", sAction) if not m: echo("# No action at line " + sIdAction) return None #### CONDITION sCondition = sAction[:m.start()].strip() if sCondition: sCondition = prepareFunction(sCondition) FUNCTIONS.append(("c" + sIdAction, sCondition)) for x in re.finditer("[.](?:group|start|end)[(](\d+)[)]", sCondition): if int(x.group(1)) > nGroup: print("# Error in groups in condition at line " + sIdAction + " (" + str(nGroup) + " groups only)") if ".match" in sCondition: echo( "# Error. JS compatibility. Don't use .match() in condition, use .search()" ) sCondition = "c" + sIdAction else: sCondition = None #### iGroup / positioning iGroup = int(m.group(2)) if m.group(2) else 0 if iGroup > nGroup: echo("# Selected group > group number in regex at line " + sIdAction) #### ACTION sAction = sAction[m.end():].strip() cAction = m.group(1) if cAction == "-": ## error iMsg = sAction.find(" # ") sMsg = sAction[iMsg + 3:].strip() sAction = sAction[:iMsg].strip() sURL = "" mURL = re.search("[|] *(https?://.*)", sMsg) if mURL: sURL = mURL.group(1).strip() sMsg = sMsg[:mURL.start(0)].strip() if sMsg[0:1] == "=": sMsg = prepareFunction(sMsg[1:]) FUNCTIONS.append(("m" + sIdAction, sMsg)) for x in re.finditer("group[(](\d+)[)]", sMsg): if int(x.group(1)) > nGroup: print("# error in groups in message at line " + sIdAction + " (" + str(nGroup) + " groups only)") sMsg = "=m" + sIdAction else: for x in re.finditer(r"\\(\d+)", sMsg): if int(x.group(1)) > nGroup: print("# error in groups in message at line " + sIdAction + " (" + str(nGroup) + " groups only)") if re.search("[.]\\w+[(]", sMsg): print( "# error in message at line " + sIdAction + ": This message looks like code. Line should begin with =" ) if sAction[0:1] == "=" or cAction == "=": if "define" in sAction and not re.search( r"define\(\\\d+ *, *\[.*\] *\)", sAction): print("# error in action at line " + sIdAction + ": second argument for define must be a list of strings") sAction = prepareFunction(sAction) sAction = sAction.replace("m.group(i[4])", "m.group(" + str(iGroup) + ")") for x in re.finditer("group[(](\d+)[)]", sAction): if int(x.group(1)) > nGroup: print("# error in groups in replacement at line " + sIdAction + " (" + str(nGroup) + " groups only)") else: for x in re.finditer(r"\\(\d+)", sAction): if int(x.group(1)) > nGroup: print("# error in groups in replacement at line " + sIdAction + " (" + str(nGroup) + " groups only)") if re.search("[.]\\w+[(]", sAction): print("# error in action at line " + sIdAction + ": This action looks like code. Line should begin with =") if cAction == "-": ## error detected if not sAction: print("# error in action at line " + sIdAction + ": This action is empty.") if sAction[0:1] == "=": FUNCTIONS.append(("s" + sIdAction, sAction[1:])) sAction = "=s" + sIdAction elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] if not sMsg: print("# error in action at line " + sIdAction + ": the message is empty.") return [sCondition, cAction, sAction, iGroup, sMsg, sURL] elif cAction == "~": ## text preprocessor if not sAction: print("# error in action at line " + sIdAction + ": This action is empty.") if sAction[0:1] == "=": FUNCTIONS.append(("p" + sIdAction, sAction[1:])) sAction = "=p" + sIdAction elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] return [sCondition, cAction, sAction, iGroup] elif cAction == "=": ## disambiguator if sAction[0:1] == "=": sAction = sAction[1:] if not sAction: print("# error in action at line " + sIdAction + ": This action is empty.") FUNCTIONS.append(("d" + sIdAction, sAction)) sAction = "d" + sIdAction return [sCondition, cAction, sAction] elif cAction == ">": ## no action, break loop if condition is False return [sCondition, cAction, ""] else: echo("# Unknown action at line " + sIdAction) return None
def createRule(s, nIdLine, sLang, bParagraph): "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]" global JSREGEXES #### OPTIONS sLineId = str(nIdLine) + ("p" if bParagraph else "s") sRuleId = sLineId sOption = False # False or [a-z0-9]+ name tGroups = None # code for groups positioning (only useful for JavaScript) cCaseMode = 'i' # i: case insensitive, s: case sensitive, u: uppercasing allowed cWordLimitLeft = '[' # [: word limit, <: no specific limit cWordLimitRight = ']' # ]: word limit, >: no specific limit m = re.match("^__([[<]\\w[]>])(/[a-zA-Z0-9]+|)(\\(\\w+\\)|)__ *", s) if m: cWordLimitLeft = m.group(1)[0] cCaseMode = m.group(1)[1] cWordLimitRight = m.group(1)[2] sOption = m.group(2)[1:] if m.group(2) else False if m.group(3): sRuleId = m.group(3)[1:-1] s = s[m.end(0):] else: echo("Warning. No option defined at line: " + sLineId) #### REGEX TRIGGER i = s.find(" <<-") if i == -1: print("# Error: no condition at line " + sLineId) return None sRegex = s[:i].strip() s = s[i + 4:] # JS groups positioning codes m = re.search("@@\\S+", sRegex) if m: tGroups = groupsPositioningCodeToList(sRegex[m.start() + 2:]) sRegex = sRegex[:m.start()].strip() # JS regex m = re.search("<js>.+</js>i?", sRegex) if m: JSREGEXES[sLineId] = m.group(0) sRegex = sRegex[:m.start()].strip() if "<js>" in sRegex or "</js>" in sRegex: print("# Error: JavaScript regex not delimited at line " + sLineId) return None # quotes ? if sRegex.startswith('"') and sRegex.endswith('"'): sRegex = sRegex[1:-1] ## definitions for sDef, sRepl in DEF.items(): sRegex = sRegex.replace(sDef, sRepl) ## count number of groups (must be done before modifying the regex) nGroup = countGroupInRegex(sRegex) if nGroup > 0: if not tGroups: print( "# warning: groups positioning code for JavaScript should be defined at line " + sLineId) else: if nGroup != len(tGroups): print("# error: groups positioning code irrelevant at line " + sLineId) ## word limit if cWordLimitLeft == '[' and not sRegex.startswith(("^", '’', "'", ",")): sRegex = WORDLIMITLEFT + sRegex if cWordLimitRight == ']' and not sRegex.endswith(("$", '’', "'", ",")): sRegex = sRegex + WORDLIMITRIGHT ## casing mode if cCaseMode == "i": bCaseInsensitive = True if not sRegex.startswith("(?i)"): sRegex = "(?i)" + sRegex elif cCaseMode == "s": bCaseInsensitive = False sRegex = sRegex.replace("(?i)", "") elif cCaseMode == "u": bCaseInsensitive = False sRegex = sRegex.replace("(?i)", "") sRegex = uppercase(sRegex, sLang) else: print("# Unknown case mode [" + cCaseMode + "] at line " + sLineId) ## check regex try: z = re.compile(sRegex) except: print("# Regex error at line ", nIdLine) echo(sRegex) traceback.print_exc() return None ## groups in non grouping parenthesis for x in re.finditer("\(\?:[^)]*\([[\w -]", sRegex): print( "# Warning: groups inside non grouping parenthesis in regex at line " + sLineId) #### PARSE ACTIONS lActions = [] nAction = 1 for sAction in s.split(" <<- "): t = createAction(sLineId + "_" + str(nAction), sAction, nGroup) nAction += 1 if t: lActions.append(t) if not lActions: return None return [ sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, lActions, tGroups ]
def main(): print("Python: " + sys.version) xParser = argparse.ArgumentParser() xParser.add_argument( "lang", type=str, nargs='+', help="lang project to generate (name of folder in /lang)") xParser.add_argument("-b", "--build_data", help="launch build_data.py", action="store_true") xParser.add_argument("-d", "--dict", help="generate FSA dictionary", action="store_true") xParser.add_argument("-t", "--tests", help="run unit tests", action="store_true") xParser.add_argument("-p", "--perf", help="run performance tests", action="store_true") xParser.add_argument("-js", "--javascript", help="JavaScript build for Firefox", action="store_true") xParser.add_argument("-fx", "--firefox", help="Launch Firefox Nightly for XPI testing", action="store_true") xParser.add_argument( "-i", "--install", help= "install the extension in Writer (path of unopkg must be set in config.ini)", action="store_true") xArgs = xParser.parse_args() dir_util.mkpath("_build") for sLang in xArgs.lang: if os.path.exists("gc_lang/" + sLang) and os.path.isdir("gc_lang/" + sLang): xConfig = getConfig(sLang) dVars = xConfig._sections['args'] if xArgs.javascript: spXPIBuild = "_build/xpi/" + sLang dir_util.mkpath(spXPIBuild + "/data") # build data if xArgs.dict: # fsa builder oDAWG = fsa.DAWG(dVars['lexicon_src'], dVars['lang_name'], dVars['stemming_method']) oDAWG.writeInfo("grammalecte/_dictionaries/" + dVars['binary_dic'] + ".info.txt") oDAWG.createBinary( "grammalecte/_dictionaries/" + dVars['binary_dic'], int(dVars['fsa_method'])) if xArgs.javascript: oDic = IBDAWG(dVars['binary_dic']) #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js") oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + dVars['js_binary_dic']) if xArgs.build_data: # lang data try: build_module = importlib.import_module("gc_lang." + sLang + ".build_data") except ImportError: print( "# Error. Couldn’t import file build_data.py in folder gc_lang/" + sLang) else: build_module.main('gc_lang/' + sLang, xArgs.javascript) # make sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, xArgs.firefox) # tests if xArgs.tests or xArgs.perf: print("> Running tests") try: tests = importlib.import_module("tests." + sLang + "_test") echo(tests.__file__) except ImportError: print( "# Error. Couldn't import file {}_test.py in folder tests" .format(sLang)) else: if xArgs.tests: xTestSuite = unittest.TestLoader().loadTestsFromModule( tests) unittest.TextTestRunner().run(xTestSuite) if xArgs.perf: tests.perf(sVersion) else: print("Folder not found: gc_lang/" + sLang)
def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-d", "--debug", help="display text transformation and disambiguation", action="store_true") xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100) xParser.add_argument("-tf", "--textformatter", help="auto-format text", action="store_true") xArgs = xParser.parse_args() if sys.platform == "win32" and xArgs.file: xArgs.file_to_file = xArgs.file xArgs.file = None gce.load() gce.setOptions({"html": True}) echo("Grammalecte v{}".format(gce.version)) oDict = gce.getDictionary() oTokenizer = tzr.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter: oTF = tf.TextFormatter() if xArgs.file: if os.path.isfile(xArgs.file): with open(xArgs.file, "r", encoding="utf-8") as hSrc: for sText in hSrc: if xArgs.textformatter: sText = oTF.formatText(sText) echo(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug)) else: print("# Error: file not found.") elif xArgs.file_to_file: if os.path.isfile(xArgs.file_to_file): with open(xArgs.file_to_file, "r", encoding="utf-8") as hSrc, \ open(xArgs.file_to_file[:xArgs.file_to_file.rfind(".")]+".res.txt", "w", encoding="utf-8") as hDst: for i, sText in enumerate(hSrc, 1): if xArgs.textformatter: sText = oTF.formatText(sText) hDst.write(parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug)) print("§ %d\r" % i, end="", flush=True) else: print("# Error: file not found.") else: sInputText = "\n~==========~ Écrivez votre texte [Entrée pour quitter] ~==========~\n" sText = _getText(sInputText) while sText: if sText.startswith("?"): for sWord in sText[1:].split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) elif sText == "rl": # reload (todo) pass else: if xArgs.textformatter: sText = oTF.formatText(sText) res = parser(sText, oTokenizer, oDict, nWidth=xArgs.width, bDebug=xArgs.debug, bEmptyIfNoErrors=True) echo("\n"+res if res else "\nNo error found.") sText = _getText(sInputText)
def make(lRules, sLang, bJavaScript): "compile rules" # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines echo(" parsing rules...") global DEF lLine = [] lRuleLine = [] lTest = [] lOpt = [] for i, sLine in enumerate(lRules, 1): if sLine.startswith('#END'): break elif sLine.startswith("#"): pass elif sLine.startswith("DEF:"): m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip()) if m: DEF["{" + m.group(1) + "}"] = m.group(2) else: print("Error in definition: ", end="") echo(sLine.strip()) elif sLine.startswith("TEST:"): lTest.append("{:<8}".format(i) + " " + sLine[5:].lstrip()) elif sLine.startswith("TODO:"): pass elif sLine.startswith( ("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTLABEL/")): lOpt.append(sLine) elif re.match("[ \t]*$", sLine): pass elif sLine.startswith((" ", "\t")): lRuleLine[len(lRuleLine) - 1][1] += " " + sLine.strip() else: lRuleLine.append([i, sLine.strip()]) # generating options files echo(" parsing options...") dOptions = prepareOptions(lOpt) #echo(dOptions) # generating test files echo(" generating test files...") with open("tests/"+sLang+"/gc_test.txt", "w", encoding="utf-8") as hDstPy, \ open("gc_lang/"+sLang+"/modules-js/tests_data.json", "w", encoding="utf-8") as hDstJS: hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n") for sLine in lTest: hDstPy.write(sLine) hDstJS.write('{ "aData": ' + json.dumps(lTest, ensure_ascii=False) + " }\n") # processing echo(" preparing rules...") bParagraph = True lParagraphRules = [] lSentenceRules = [] lParagraphRulesJS = [] lSentenceRulesJS = [] for nLine, sLine in lRuleLine: if sLine: if sLine == "[++]": bParagraph = False else: aRule = createRule(sLine, nLine, sLang, bParagraph) if aRule: if bParagraph: lParagraphRules.append(aRule) lParagraphRulesJS.append(pyRuleToJS(aRule)) else: lSentenceRules.append(aRule) lSentenceRulesJS.append(pyRuleToJS(aRule)) # creating file with all functions callable by rules echo(" creating callables...") with open("gc_lang/"+sLang+"/modules/gc_tmp_eval.py", "w", encoding="utf-8") as hDstPy, \ open("gc_lang/"+sLang+"/modules-js/gc_tmp_eval.js", "w", encoding="utf-8") as hDstJS: hDstPy.write("# generated code, do not edit\n") hDstJS.write("// generated code, do not edit\nconst oEvalFunc = {\n") for sFuncName, sReturn in FUNCTIONS: cType = sFuncName[0:1] if cType == "c": # condition sParams = "s, sx, m, dDA, sCountry, bCondMemo" elif cType == "m": # message sParams = "s, m" elif cType == "s": # suggestion sParams = "s, m" elif cType == "p": # preprocessor sParams = "s, m" elif cType == "d": # disambiguator sParams = "s, m, dDA" else: print("# Unknown function type in [" + sFuncName + "]") continue hDstPy.write("def {} ({}):\n".format(sFuncName, sParams)) hDstPy.write(" return " + sReturn + "\n") hDstJS.write(" {}: function ({})".format(sFuncName, sParams) + " {\n") hDstJS.write(" return " + py2js(sReturn) + ";\n") hDstJS.write(" },\n") hDstJS.write("}\n") displayStats(lParagraphRules, lSentenceRules) d = { "paragraph_rules": mergeRulesByOption(lParagraphRules), "sentence_rules": mergeRulesByOption(lSentenceRules), "paragraph_rules_JS": writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)), "sentence_rules_JS": writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) } d.update(dOptions) return d
def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str) xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str) xParser.add_argument("-j", "--json", help="generate list of errors in JSON", action="store_true") xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100) xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules", action="store_true") xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option 'file' or 'file_to_file')", action="store_true") xArgs = xParser.parse_args() gce.load() gce.setOptions({"html": True}) echo("Grammalecte v{}".format(gce.version)) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oLexGraphe = lxg.Lexicographe(oDict) if xArgs.textformatter or xArgs.textformatteronly: oTF = tf.TextFormatter() sFile = xArgs.file or xArgs.file_to_file if sFile: # file processing hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8") if xArgs.file_to_file or sys.platform == "win32" else None bComma = False if xArgs.json: output('{ "grammalecte": "'+gce.version+'", "lang": "'+gce.lang+'", "data" : [\n', hDst) for i, sText in enumerate(readfile(sFile), 1): if xArgs.textformatter or xArgs.textformatteronly: sText = oTF.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) else: sText = generateText(i, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True if hDst: echo("§ %d\r" % i, end="", flush=True) if xArgs.json: output("\n]}\n", hDst) else: # pseudo-console sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n" sText = _getText(sInputText) bDebug = False while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* {}".format(sWord)) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) elif sText.startswith("/+"): gce.setOptions({ opt:True for opt in sText[2:].strip().split() if opt in gce.getOptions() }) elif sText.startswith("/-"): gce.setOptions({ opt:False for opt in sText[2:].strip().split() if opt in gce.getOptions() }) elif sText == "/debug" or sText == "/d": bDebug = not(bDebug) echo("debug mode on" if bDebug else "debug mode off") elif sText == "/help" or sText == "/h": echo(_HELP) elif sText == "/lopt" or sText == "/l": echo("\n".join( [ k+":\t"+str(v) for k, v in sorted(gce.getOptions().items()) ] )) elif sText == "/quit" or sText == "/q": break elif sText.startswith("/rl"): # reload (todo) pass else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: sText = oTF.formatText(sText) sRes = generateText(0, sText, oTokenizer, oDict, xArgs.json, nWidth=xArgs.width, bDebug=bDebug, bEmptyIfNoErrors=True) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText)
aSpellErrs.append(dToken) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" return " " + json.dumps( { "iParagraph": iParagraph, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) if __name__ == '__main__': gce.load() echo("Grammalecte v{}".format(gce.version)) dServerOptions = getServerOptions() dGCOptions = getConfigOptions("fr") if dGCOptions: gce.setOptions(dGCOptions) dServerGCOptions = gce.getOptions() echo("Grammar options:\n" + " | ".join( [k + ": " + str(v) for k, v in sorted(dServerGCOptions.items())])) oDict = gce.getDictionary() oTokenizer = tkz.Tokenizer("fr") oTF = tf.TextFormatter() dUser = {} userGenerator = genUserId() app = Bottle()