def compile(self, rule): log.debug('compiling rule %s => %s' % (rule.pattern, rule.callback)) pattern = '' i = 0 while i < len(rule.pattern): c = rule.pattern[i] if c != '_': pattern += c i += 1 continue beg = i end = i + 1 for j in range(beg + 1, len(rule.pattern)): if rule.pattern[j] in ['_', ' ']: end = j break if end == beg or rule.pattern[end] != '_': raise RuntimeError('unterminated token in %s' % rule.pattern) type = rule.pattern[beg:end + 1] id = self.lookup_id(type) if id is None: raise RuntimeError('unknown token type %s' % type) pattern += '(' + str(id) + ')' i = end + 1 log.debug('%s => %s' % (rule.pattern, pattern)) rule.re = re.compile(pattern)
def tokenize(self): while self.files: input = self.files[-1] for tkn in input: if tkn.str == '@modules': self.load_modules(input) break if tkn.str == '@include': path = next(input).str self.include_file(path, tkn.level, input.path) break else: log.debug('+ token %s@%d' % (tkn.str, tkn.level)) self.tokenq.append(tkn) else: self.files.pop(-1)
def enumerate_tokens(self): next_id = 0 what = 'keyword' for tknsets in [self.keywords.values(), self.tokens.values()]: for tknset in tknsets: for tkn in tknset: id = self.lookup_id(tkn.type) if id is None: tkn.id = next_id self.idtbl[tkn.type] = tkn.id self.typetbl[tkn.id] = tkn.type next_id += 1 log.debug('%s %s => #%d' % (what, tkn.type, tkn.id)) else: tkn.id = id what = 'token'
def classify_keywords(self, tokens): for tkn in tokens: if tkn.type is not None: continue for kw in self.active_keywords[-1]: if tkn.str == kw.match: tkn.type = kw.type break if tkn.type is not None: continue for kw in self.active_keywords[0]: if tkn.str == kw.match: tkn.type = kw.type break if tkn.type is not None: log.debug('token %s: keyword %s' % (tkn.str, tkn.type))
def classify_tokens(self, tokens): for tkn in tokens: if tkn.type is not None: continue contexts = [self.active_tokens[-1], self.active_keywords[-1]] if len(self.active_tokens) > 1: contexts.append(self.active_tokens[0]) contexts.append(self.active_keywords[0]) for ctx in contexts: for tkndef in ctx: tkndef.classify(tkn) if tkn.type is None: tkn.type = '_token_' if tkn.type == '_comma_': tkn.type = ',' if tkn.type == '_dash_': tkn.type = '-' log.debug('token %s: token %s' % (tkn.str, tkn.type))
def classify(self, tkn): if tkn.type is not None: return if type(self.match) == type(''): log.debug('testing token %s with %s' % (tkn.str, self.match)) if tkn.str == self.match: tkn.type = self.type elif type(self.match) == Lexer.regexp_type: log.debug('testing token %s with %s' % (tkn.str, self.match.pattern)) m = self.match.match(tkn.str) if m is not None and m.group(0) == tkn.str: tkn.type = self.type elif callable(self.match): self.match(tkn) if tkn.type is not None: #log.debug('token %s: type %s' % (tkn.str, tkn.type)) pass return True if tkn.type is not None else False
def match_rule(self, rules, tknstr): max = 0 rule = None match = None for r in rules: log.debug('matching "%s" against "%s"' % (tknstr, r.re.pattern)) m = r.re.match(tknstr) if m is not None: log.debug(' => match (%s)' % m.group(0)) l = len(m.group(0)) if l > max: max = l rule = r match = m else: log.debug(' => mismatch') return rule, match.group(0) if match else None
def pop_context(self): log.debug('pop_context') self.active_keywords.pop(-1) self.active_tokens.pop(-1)
def push_context(self, name): log.debug('push_context %s' % name) kl = self.keywords[name] if name in self.keywords.keys() else [] tl = self.tokens[name] if name in self.tokens.keys() else [] self.active_keywords.append(kl) self.active_tokens.append(tl)
def parse_node(self, node_tkn, parent): node_name = node_tkn.str log.debug('parsing node %s...' % node_name) if node_name not in self.nodes.keys(): if not self.demand_load(node_name): raise RuntimeError('%s:%d: unknown node type %s' % (self.where(node_tkn) + (node_name, ))) self.push_context(node_name) nodedef = self.nodes[node_name] extra = self.pull_tokens(node_tkn.level, nodedef.extra_tokens) root = self.root node = nodedef.type(nodedef, root, parent, node_tkn, *extra) tokens = self.pull_tokens(node_tkn.level) log.debug('%s block: %s' % (node_name, ' '.join(x.str for x in tokens))) while tokens: xlated = self.translate_tokens(tokens) tknstr = ' '.join(x.type for x in tokens) xltstr = re.sub(r' , ', ', ', ' '.join(str(x) for x in xlated)) log.debug('%s xlated to %s' % (tknstr, xltstr)) rule, match = self.match_rule(self.rules[node_name], xltstr) if rule is None: if tokens[0].str in self.nodes.keys() or \ self.demand_load(tokens[0].str): self.pushback_tokens(tokens[1:]) c_tkn = tokens[0] c = self.parse_node(c_tkn, node) if c is None: raise RuntimeError('%s:%d: failed to parse' % self.where(c_tkn)) else: log.debug('pushing back tokens %s' % ','.join([x.str for x in tokens])) self.pushback_tokens(tokens[0:]) self.pop_context() return node tokens = self.pull_tokens(node_tkn.level) else: log.debug('%s => %s (%s)' % (tknstr, rule.re.pattern, rule.callback)) n = match.count(' ') + match.count(',') + 1 args = tokens[0:n] tokens = tokens[n:] log.debug('matched tokens %d => %s' % (n, ' '.join(x.str for x in tokens))) c = getattr(node, rule.callback) if c is None: RuntimeError('%s has no method %s' % (str(nodedef.type), rule.callback)) else: c(*args) self.pop_context() return node