def create_root_symbols(self): """Insert magical symbols above the root of the grammar in order to match the beginning and end of the sample.""" RootSymbol = Symbol(Token(None,None,'R00t.Symbol')) RootSymbol.GlobalSymbolDict=self.GlobalSymbolDict StartDocSymbol = Symbol(Token(None,None,'%^')) StartDocSymbol.regex = Set('%^') StartDocSymbol.is_lit = True StartDocSymbol.GlobalSymbolDict=self.GlobalSymbolDict EndDocSymbol = Symbol(Token(None,None,'%$')) EndDocSymbol.regex = Set('%$') EndDocSymbol.is_lit = True EndDocSymbol.GlobalSymbolDict=self.GlobalSymbolDict RootSymbol.productions = [Production(RootSymbol,[StartDocSymbol]+self.get_roots()+[EndDocSymbol])] self.GlobalSymbolDict['R00t.Symbol'] = RootSymbol #XXX this is a nasty hack self.GlobalSymbolDict['%^']=StartDocSymbol self.GlobalSymbolDict['%$']=EndDocSymbol
def parse_cst(self): """Given that the token generator is positioned at the start of the concrete grammar, read rules. After this routine completes, each symbol in the GlobalSymbolDict has a set of productions that contain Tokens, not symbols. Conversion from tokens to symbols happens in promote_productions.""" stack = [] self.tokenizer.next().must_be('{') for token in self.tokenizer: stack += [ token ] # Build a stack to process if token.text == ".": # We've got a rule to process. Start by determining correct syntax. stack[1].must_be(':') ## Name analysis stack[0].assert_symbol_name() production_elements = stack[2:-1] for element in production_elements: element.assert_symbol_name() if stack[0].text in self.GlobalSymbolDict: # Redefined lexical sym or add a new production? existingSymbol = self.GlobalSymbolDict[stack[0].text] if existingSymbol.is_gla: raise Exception("Lexical Symbol %s redefined at %d,%d. Originally at %d,%d" % \ (stack[0].text, stack[0].line, stack[0].col, \ existingSymbol.defining_token.line, existingSymbol.defining_token.col)) existingSymbol.productions += [Production(existingSymbol,production_elements)] else: # Brand new symbol occurrence s = Symbol(stack[0]) s.is_gla = False s.productions = [Production(s,production_elements)] self.GlobalSymbolDict[stack[0].text] = s stack = [] elif token.text == "{": raise Exception("Unexpected %s" % token) elif token.text == "}": if len(stack) > 1: raise Exception("Unfinished lexical specification beginning with %s" % stack[0]) #pp = pprint.PrettyPrinter() #pp.pprint(self.GlobalSymbolDict) return else: pass