def parse_lexical_symbols(self): """Given that the token generator is at the beginning of the lexical symbol specifications, read a series of lexical symbol specifications, doing name and basic type analysis on the fly.""" stack = [] self.tokenizer.next().must_be('{') for token in self.tokenizer: stack += [ token ] if token.text == ".": stack[0].assert_symbol_name() stack[1].must_be(':') stack[2].must_match('^\\$', "regular expression") ## Name analysis if stack[0].text in self.GlobalSymbolDict: originalDef = self.GlobalSymbolDict[stack[0].text].defining_token raise Exception("Symbol %s redefined at %d,%d. Originally at %d,%d" % (stack[0].text, stack[0].line, stack[0].col, \ originalDef.line, originalDef.col)) s = Symbol(stack[0]) s.is_gla = True s.regex = Set(stack[2].text[1:]) self.GlobalSymbolDict[stack[0].text] = s stack = [] elif token.text == "{": raise Exception("Unexpected %s" % token) elif token.text == "}": if len(stack) > 1: raise Exception("Unfinished lexical specification beginning with %s" % stack[0]) return else: pass
def create_root_symbols(self): """Insert magical symbols above the root of the grammar in order to match the beginning and end of the sample.""" RootSymbol = Symbol(Token(None,None,'R00t.Symbol')) RootSymbol.GlobalSymbolDict=self.GlobalSymbolDict StartDocSymbol = Symbol(Token(None,None,'%^')) StartDocSymbol.regex = Set('%^') StartDocSymbol.is_lit = True StartDocSymbol.GlobalSymbolDict=self.GlobalSymbolDict EndDocSymbol = Symbol(Token(None,None,'%$')) EndDocSymbol.regex = Set('%$') EndDocSymbol.is_lit = True EndDocSymbol.GlobalSymbolDict=self.GlobalSymbolDict RootSymbol.productions = [Production(RootSymbol,[StartDocSymbol]+self.get_roots()+[EndDocSymbol])] self.GlobalSymbolDict['R00t.Symbol'] = RootSymbol #XXX this is a nasty hack self.GlobalSymbolDict['%^']=StartDocSymbol self.GlobalSymbolDict['%$']=EndDocSymbol
def create_symbols(lines): global symbols rules = make_rules(lines) for rule in rules: name = rule[1:-1] symbol = Symbol(name) symbols.append(symbol) for rule in rules: name = rule[1:-1] symbol = get_symbol(name) if "regex" in rules[rule]: symbol.regex = extract_regex(rules[rule]) symbol.is_regex = True else: prods = make_prods(rules[rule]) symbol.prods = prods
def promote_productions(self): """Convert all the elements of products from tokens into symbols, meanwhile checking that all of the elements are existing symbols. This is name analysis in action: because symbol names have Algol scoping inside the concrete grammar portion of the input file, we wait until the whose shebang is parsed before attempting to promote tokens into symbols.""" for sym in self.GlobalSymbolDict.values(): for production in sym.productions: elements = production.elements if len(elements) > 0: # An empty production has no tokens to promote firstToken = elements[0] for i in range(0, len(elements)): if re.compile("^'").match(elements[i].text): # If the element is a literal, no name analysis needs to be done elements[i] = Symbol(elements[i]) elements[i].is_lit = True elements[i].regex = Set(re.escape(elements[i].defining_token.text[1:-1])) self.GlobalSymbolDict[elements[i].defining_token.text]=elements[i] else: # Do name analysis: check if the symbol is used without being defined. try: elements[i] = self.GlobalSymbolDict[elements[i].text] except KeyError, e: raise Exception("Production for %s beginning at %d,%d: %s is not a symbol." % \ (sym.defining_token.text, firstToken.line, firstToken.col, elements[i].text))