def get_tokens(self): if self._cache_tokens is not None: return self._cache_tokens result = [] for module, k, v, dirtoks in self.named_objs: if issubclass(v, Token) and dirtoks[0] in ["%token"]: name = k prec = None i = 1 while i < len(dirtoks): tok = dirtoks[i] m = NontermSpec.precedence_tok_re.match(tok) if m: if i < len(dirtoks) - 1: raise SpecError(("Precedence must come last in token " \ + "specification: %s") % v.__doc__) prec = m.group(1) else: m = NontermSpec.token_re.match(tok) if m: name = m.group(1) else: raise SpecError("Invalid token specification: %s" % \ v.__doc__) i += 1 if prec is None: prec = "none" token = TokenSpec(name, v, prec) result.append(token) return result
def compile_start(self, lst): """ %start is used for the start symbol (mandatory). """ if len(lst) > 2: raise SpecError( "%start directive with extra stuff: {}".format(lst)) if len(lst) == 2 and lst[1] != self.name: raise SpecError( "%start directive uses symbol {}, should be {}".format( lst[1], self.name))
def compile_list(self, lst): """ The `%list item sep` instruction creates rules for a list of `item`s separated by `sep`s. If `sep` is not a keyword or literal symbol, the separators get included in the resulting list. """ if len(lst) != 3: raise SpecError( "%list needs item and sep arguments, got {}".format(lst[1:])) if lst[2].startswith("'"): # simple list with ignorable separator fn_src = [ 'def reduce_single(self, item):', ' "%%reduce %s"' % (lst[1], ), ' return [item]', 'def reduce_multiple(self, lst, sep, item):', ' "%%reduce %s %s %s"' % (self.name, lst[2], lst[1]), ' return lst + [item]' ] else: # list with non-ignorable separator fn_src = [ 'def reduce_single(self, item):', ' "%%reduce %s"' % (lst[1], ), ' return [item]', 'def reduce_multiple(self, lst, sep, item):', ' "%%reduce %s %s %s"' % (self.name, lst[2], lst[1]), ' return lst + [sep, item]' ] self.add_method(fn_src)
def compile_choice(self, lst): """ The %choice shorthand creates rules with a single symbol on the right hand side, which gets re-used as the AST for this node. """ for name in lst[1:]: last_char = name[-1] if last_char not in "?*+'": arg_name = snake_case(name) fn_name = 'r_' + arg_name elif last_char in '?*+': arg_name = snake_case(name[:-1]) fn_name = 'r_' + arg_name + postfix[name[-1]] elif last_char == "'": arg_name = '_' fn_name = 'r_const_' + hex(hash(name)) if not NontermSpec.token_re.match(name): raise SpecError("%s is not a valid RHS symbol" % (name, )) fillers = {'fn_name': fn_name, 'arg_name': arg_name, 'name': name} fn_src = [ x % fillers for x in [ 'def %(fn_name)s(self, %(arg_name)s):', ' "%%reduce %(name)s"', ' return %(arg_name)s' ] ] self.add_method(fn_src)
def compile_nonterm(self, lst): """ %nonterm is used for normal nonterminals (leftover from module-based declarations) """ if len(lst) >= 2 and lst[1] != self.name: raise SpecError( "%nonterm directive uses symbol {}, should be {}".format( lst[1], self.name))
def compile_enum(self, lst): """ compiles a %enum directive, which simply sets self.type to the name of the matched keyword (optionally with a suffix). As an example, `%enum:Foo 'bar' 'baz'` will recognize `bar` and `baz` keywords and set the `type` attribute to `BarFoo` and `BazFoo`, respectively. """ if ':' in lst[0]: suffix = lst[0].split(':')[1] else: suffix = camel_case(self.name) for name in lst[1:]: if name[0] != "'" or name[-1] != "'": raise SpecError("%s must be a literal (enclosed in '')" % (name, )) kwd_name = name[1:-1] if "'" in kwd_name or "\\" in kwd_name: raise SpecError("%s has disallowed characters" % (name, )) if identifier_re.match(kwd_name): val_name = kwd_name elif kwd_name in symbol_names: val_name = symbol_names[kwd_name] kwd_name = val_name else: val_name = hex(hash(kwd_name)) fillers = { 'val_name': val_name, 'suffix': suffix, 'kwd_name': camel_case(kwd_name), 'escaped_name': name } fn_src = [ x % fillers for x in [ 'def reduce_%(val_name)s(self, _x):', ' "%%reduce %(escaped_name)s"', " self.type = '%(kwd_name)s%(suffix)s'" ] ] self.add_method(fn_src)
def get_nonterminals(cls): result = [] startSym = None for k, v in iteritems(cls._nonterms): nonterm, is_start = NontermSpec.from_class(v, k) result.append(nonterm) if is_start: if startSym is not None: raise SpecError("Only one start non-terminal allowed: %s / %s" \ % (v.__doc__, startSym)) else: startSym = nonterm return result, startSym
def get_precedences(self): if self._cache_precedences is not None: return self._cache_precedences result = [] for module, k, v, dirtoks in self.named_objs: if issubclass(v, Precedence) and dirtoks[0] in \ ["%fail", "%nonassoc", "%left", "%right", "%split"]: name = k relationships = {} i = 1 while i < len(dirtoks): tok = dirtoks[i] m = Precedence.assoc_tok_re.match(tok) if m: # Precedence relationship. if m.group(2) in relationships: raise SpecError(("Duplicate precedence " \ + "relationship: %s") \ % v.__doc__) relationships[m.group(2)] = m.group(1) else: m = NontermSpec.token_re.match(tok) if m: if i != 1: raise SpecError(("Precedence name must come before " \ + "relationships: %s") \ % v.__doc__) name = m.group(1) else: raise SpecError("Invalid precedence specification: %s" % \ v.__doc__) i += 1 prec = Precedence(name, dirtoks[0][1:], relationships) result.append(prec) self._cache_precedences = result return result
def __init__(cls, name, bases, clsdict): more_stuff = {} if '__doc__' in clsdict: doc = clsdict['__doc__'] interpret_docstring(doc, more_stuff, name) type.__init__(cls, name, bases, clsdict) gram_cls = cls._grammar_cls if name in gram_cls._nonterms: raise SpecError('duplicate Nonterm class %s' % (name, )) for k, v in iteritems(more_stuff): #print(k, type(v), isinstance(v, FunctionType)) setattr(cls, k, v) # the Nonterm base class is skipped if not (name == 'Nonterm' and len([ x for x in list(clsdict.values()) if isinstance(x, MethodType) ]) == 0): gram_cls._nonterms[name] = cls
def get_nonterminals(self): if self._cache_nonterminals is not None: return self._cache_nonterminals result = [] startSym = None for module, k, v, dirtoks in self.named_objs: if issubclass(v, Nonterm) and \ dirtoks[0] in ["%start", "%nonterm"]: nonterm, is_start = NontermSpec.from_class(v) result.append(nonterm) if is_start: # Start symbol. if startSym is not None: raise SpecError("Only one start non-terminal allowed: %s" \ % v.__doc__) startSym = nonterm self._cache_nonterminals = (result, startSym) return result, startSym