def regex(self, ast, *args):
     pattern = ast
     try:
         re.compile(pattern)
     except (TypeError, re.error) as e:
         raise FailedSemantics('regexp error: ' + str(e))
     return pattern
Example #2
0
 def symbol_or_pair(self, ast):
     string = ast.token.strip()
     failmsg = []
     pat = re.compile(
         r"""^
     (?P<up>[a-zšžåäöüõA-ZÅÄÖ0-9]*
      |
      \{[a-zåäöüõA-ZÅÄÖØ]+\})
     :
     (?P<lo>[a-zšžåäöüõA-ZÅÄÖØ]*)
     $""", re.X)
     m = re.match(pat, string)
     if m:  # it is a pair with a colon
         up = m.group("up")
         up_quoted = re.sub(r"([{}])", r"%\1", up)
         lo = m.group("lo")
         if up and (up not in cfg.input_symbol_set):
             failmsg.append("input symbol '{}'".format(up))
         if lo and (lo not in cfg.output_symbol_set):
             failmsg.append("output symbol '{}'".format(lo))
         if up and lo and ((up, lo) not in cfg.symbol_pair_set):
             failmsg.append("symbol pair '{}'".format(string))
         if failmsg:
             cfg.error_message = " and ".join(failmsg) + " not in alphabet"
             raise FailedSemantics(cfg.error_message)
         elif up and lo:  # it is a valid pair with a colon
             return "{}:{}".format(up_quoted, lo)
         elif up and (not lo):
             return "[{} .o. PI]".format(up_quoted)
         elif (not up) and lo:
             return "[PI .o. {}]".format(lo)
         else:
             return "PI"
     m = re.fullmatch(r"[a-zåäöüõA-ZÅÄÖØ]+", string)
     if m:  # its either a defined sym or a surf ch
         if string in cfg.definitions:
             return "{}".format(string)
         elif (string
               in cfg.output_symbol_set) and (string
                                              in cfg.input_symbol_set):
             return "{}:{}".format(string, string)
         elif string in {'BEGIN', 'END'}:
             return string
     cfg.error_message = "'" + string + "' is an invalid pair/definend symbol"
     raise FailedSemantics(cfg.error_message)
Example #3
0
 def symbol_or_pair(self, ast):
     string = ast.token.strip()
     failmsg = []
     pat = re.compile(
         r"""^
     (?P<up>[a-zšžåäöüõA-ZÅÄÖ0-9'´`]*
      |
      \{[a-zåäöüõA-ZÅÄÖØ'´`]+\})
     :
     (?P<lo>[a-zšžåäöüõA-ZÅÄÖØ'´`]*)
     $""", re.X)
     m = re.match(pat, string)
     if m:  # it is a pair with a colon
         up = m.group("up")
         up_quoted = re.sub(r"([{}])", r"%\1", up)
         lo = m.group("lo")
         if up and (up not in cfg.input_symbol_set):
             failmsg.append("input symbol '{}'".format(up))
         if lo and (lo not in cfg.output_symbol_set):
             failmsg.append("output symbol '{}'".format(lo))
         if up and lo and ((up, lo) not in cfg.symbol_pair_set):
             failmsg.append("symbol pair '{}'".format(string))
         if failmsg:
             cfg.error_message = " and ".join(failmsg) + " not in alphabet"
             raise FailedSemantics(cfg.error_message)
         elif up and lo:  # it is a valid pair with a colon
             result_fst = hfst.regex(up_quoted + ':' + lo)
             result_fst.set_name(string)
             return result_fst
         elif up and (not lo):
             result_fst = hfst.regex(up_quoted)
             result_fst.compose(cfg.all_pairs_fst)
             result_fst.set_name(string)
             return result_fst
         elif (not up) and lo:
             result_fst = cfg.all_pairs_fst.copy()
             lo_fst = hfst.regex(lo)
             result_fst.compose(lo_fst)
             result_fst.set_name(string)
             return result_fst
         else:
             result_fst = cfg.all_pairs_fst.copy()
             result_fst.set_name("PI")
             return result_fst
     m = re.fullmatch(r"[a-zåäöšžüõA-ZÅÄÖØ'´`]+", string)
     if m:  # its either a defined sym or a surf ch
         if string in cfg.definitions:
             result_fst = cfg.definitions[string].copy()
             result_fst.set_name(string)
             return result_fst
         elif (string
               in cfg.output_symbol_set) and (string
                                              in cfg.input_symbol_set):
             result_fst = hfst.regex(string)
             result_fst.set_name(string)
             return result_fst
         elif string in {'BEGIN', 'END'}:
             result_fst = hfst.regex(string)
             result_fst.set_name(string)
             return result_fst
     cfg.error_message = "'" + string + "' is an invalid pair/definend symbol"
     raise FailedSemantics(cfg.error_message)
 def known_name(self, name):
     if name not in self.rules:
         raise FailedSemantics('rule "%s" not yet defined' % str(name))
     return name
 def new_name(self, name):
     if name in self.rules:
         raise FailedSemantics('rule "%s" already defined' % str(name))
     return name
 def token(self, ast, *args):
     token = ast
     if not token:
         raise FailedSemantics('empty token')
     return grammars.Token(token)
Example #7
0
 def ul_marker(self, ast):
     ctx = self._context
     if ctx.substate is not None and not ctx.tokenizer.match(
             "*" * ctx.substate):
         raise FailedSemantics("not at correct level")
     return ast
Example #8
0
 def error(self, msg):
     raise FailedSemantics(msg)