def regex(self, ast, *args): pattern = ast try: re.compile(pattern) except (TypeError, re.error) as e: raise FailedSemantics('regexp error: ' + str(e)) return pattern
def symbol_or_pair(self, ast): string = ast.token.strip() failmsg = [] pat = re.compile( r"""^ (?P<up>[a-zšžåäöüõA-ZÅÄÖ0-9]* | \{[a-zåäöüõA-ZÅÄÖØ]+\}) : (?P<lo>[a-zšžåäöüõA-ZÅÄÖØ]*) $""", re.X) m = re.match(pat, string) if m: # it is a pair with a colon up = m.group("up") up_quoted = re.sub(r"([{}])", r"%\1", up) lo = m.group("lo") if up and (up not in cfg.input_symbol_set): failmsg.append("input symbol '{}'".format(up)) if lo and (lo not in cfg.output_symbol_set): failmsg.append("output symbol '{}'".format(lo)) if up and lo and ((up, lo) not in cfg.symbol_pair_set): failmsg.append("symbol pair '{}'".format(string)) if failmsg: cfg.error_message = " and ".join(failmsg) + " not in alphabet" raise FailedSemantics(cfg.error_message) elif up and lo: # it is a valid pair with a colon return "{}:{}".format(up_quoted, lo) elif up and (not lo): return "[{} .o. PI]".format(up_quoted) elif (not up) and lo: return "[PI .o. {}]".format(lo) else: return "PI" m = re.fullmatch(r"[a-zåäöüõA-ZÅÄÖØ]+", string) if m: # its either a defined sym or a surf ch if string in cfg.definitions: return "{}".format(string) elif (string in cfg.output_symbol_set) and (string in cfg.input_symbol_set): return "{}:{}".format(string, string) elif string in {'BEGIN', 'END'}: return string cfg.error_message = "'" + string + "' is an invalid pair/definend symbol" raise FailedSemantics(cfg.error_message)
def symbol_or_pair(self, ast): string = ast.token.strip() failmsg = [] pat = re.compile( r"""^ (?P<up>[a-zšžåäöüõA-ZÅÄÖ0-9'´`]* | \{[a-zåäöüõA-ZÅÄÖØ'´`]+\}) : (?P<lo>[a-zšžåäöüõA-ZÅÄÖØ'´`]*) $""", re.X) m = re.match(pat, string) if m: # it is a pair with a colon up = m.group("up") up_quoted = re.sub(r"([{}])", r"%\1", up) lo = m.group("lo") if up and (up not in cfg.input_symbol_set): failmsg.append("input symbol '{}'".format(up)) if lo and (lo not in cfg.output_symbol_set): failmsg.append("output symbol '{}'".format(lo)) if up and lo and ((up, lo) not in cfg.symbol_pair_set): failmsg.append("symbol pair '{}'".format(string)) if failmsg: cfg.error_message = " and ".join(failmsg) + " not in alphabet" raise FailedSemantics(cfg.error_message) elif up and lo: # it is a valid pair with a colon result_fst = hfst.regex(up_quoted + ':' + lo) result_fst.set_name(string) return result_fst elif up and (not lo): result_fst = hfst.regex(up_quoted) result_fst.compose(cfg.all_pairs_fst) result_fst.set_name(string) return result_fst elif (not up) and lo: result_fst = cfg.all_pairs_fst.copy() lo_fst = hfst.regex(lo) result_fst.compose(lo_fst) result_fst.set_name(string) return result_fst else: result_fst = cfg.all_pairs_fst.copy() result_fst.set_name("PI") return result_fst m = re.fullmatch(r"[a-zåäöšžüõA-ZÅÄÖØ'´`]+", string) if m: # its either a defined sym or a surf ch if string in cfg.definitions: result_fst = cfg.definitions[string].copy() result_fst.set_name(string) return result_fst elif (string in cfg.output_symbol_set) and (string in cfg.input_symbol_set): result_fst = hfst.regex(string) result_fst.set_name(string) return result_fst elif string in {'BEGIN', 'END'}: result_fst = hfst.regex(string) result_fst.set_name(string) return result_fst cfg.error_message = "'" + string + "' is an invalid pair/definend symbol" raise FailedSemantics(cfg.error_message)
def known_name(self, name): if name not in self.rules: raise FailedSemantics('rule "%s" not yet defined' % str(name)) return name
def new_name(self, name): if name in self.rules: raise FailedSemantics('rule "%s" already defined' % str(name)) return name
def token(self, ast, *args): token = ast if not token: raise FailedSemantics('empty token') return grammars.Token(token)
def ul_marker(self, ast): ctx = self._context if ctx.substate is not None and not ctx.tokenizer.match( "*" * ctx.substate): raise FailedSemantics("not at correct level") return ast
def error(self, msg): raise FailedSemantics(msg)