def parse_lexical_symbols(self): """Given that the token generator is at the beginning of the lexical symbol specifications, read a series of lexical symbol specifications, doing name and basic type analysis on the fly.""" stack = [] self.tokenizer.next().must_be('{') for token in self.tokenizer: stack += [ token ] if token.text == ".": stack[0].assert_symbol_name() stack[1].must_be(':') stack[2].must_match('^\\$', "regular expression") ## Name analysis if stack[0].text in self.GlobalSymbolDict: originalDef = self.GlobalSymbolDict[stack[0].text].defining_token raise Exception("Symbol %s redefined at %d,%d. Originally at %d,%d" % (stack[0].text, stack[0].line, stack[0].col, \ originalDef.line, originalDef.col)) s = Symbol(stack[0]) s.is_gla = True s.regex = Set(stack[2].text[1:]) self.GlobalSymbolDict[stack[0].text] = s stack = [] elif token.text == "{": raise Exception("Unexpected %s" % token) elif token.text == "}": if len(stack) > 1: raise Exception("Unfinished lexical specification beginning with %s" % stack[0]) return else: pass
def parse_cst(self): """Given that the token generator is positioned at the start of the concrete grammar, read rules. After this routine completes, each symbol in the GlobalSymbolDict has a set of productions that contain Tokens, not symbols. Conversion from tokens to symbols happens in promote_productions.""" stack = [] self.tokenizer.next().must_be('{') for token in self.tokenizer: stack += [ token ] # Build a stack to process if token.text == ".": # We've got a rule to process. Start by determining correct syntax. stack[1].must_be(':') ## Name analysis stack[0].assert_symbol_name() production_elements = stack[2:-1] for element in production_elements: element.assert_symbol_name() if stack[0].text in self.GlobalSymbolDict: # Redefined lexical sym or add a new production? existingSymbol = self.GlobalSymbolDict[stack[0].text] if existingSymbol.is_gla: raise Exception("Lexical Symbol %s redefined at %d,%d. Originally at %d,%d" % \ (stack[0].text, stack[0].line, stack[0].col, \ existingSymbol.defining_token.line, existingSymbol.defining_token.col)) existingSymbol.productions += [Production(existingSymbol,production_elements)] else: # Brand new symbol occurrence s = Symbol(stack[0]) s.is_gla = False s.productions = [Production(s,production_elements)] self.GlobalSymbolDict[stack[0].text] = s stack = [] elif token.text == "{": raise Exception("Unexpected %s" % token) elif token.text == "}": if len(stack) > 1: raise Exception("Unfinished lexical specification beginning with %s" % stack[0]) #pp = pprint.PrettyPrinter() #pp.pprint(self.GlobalSymbolDict) return else: pass