예제 #1
0
    def compile(self, rule):
        log.debug('compiling rule %s => %s' % (rule.pattern, rule.callback))
        pattern = ''
        i = 0
        while i < len(rule.pattern):
            c = rule.pattern[i]
            if c != '_':
                pattern += c
                i += 1
                continue

            beg = i
            end = i + 1
            for j in range(beg + 1, len(rule.pattern)):
                if rule.pattern[j] in ['_', ' ']:
                    end = j
                    break
            if end == beg or rule.pattern[end] != '_':
                raise RuntimeError('unterminated token in %s' % rule.pattern)

            type = rule.pattern[beg:end + 1]
            id = self.lookup_id(type)
            if id is None:
                raise RuntimeError('unknown token type %s' % type)
            pattern += '(' + str(id) + ')'
            i = end + 1

        log.debug('%s => %s' % (rule.pattern, pattern))
        rule.re = re.compile(pattern)
예제 #2
0
 def tokenize(self):
     while self.files:
         input = self.files[-1]
         for tkn in input:
             if tkn.str == '@modules':
                 self.load_modules(input)
                 break
             if tkn.str == '@include':
                 path = next(input).str
                 self.include_file(path, tkn.level, input.path)
                 break
             else:
                 log.debug('+ token %s@%d' % (tkn.str, tkn.level))
                 self.tokenq.append(tkn)
         else:
             self.files.pop(-1)
예제 #3
0
 def enumerate_tokens(self):
     next_id = 0
     what = 'keyword'
     for tknsets in [self.keywords.values(), self.tokens.values()]:
         for tknset in tknsets:
             for tkn in tknset:
                 id = self.lookup_id(tkn.type)
                 if id is None:
                     tkn.id = next_id
                     self.idtbl[tkn.type] = tkn.id
                     self.typetbl[tkn.id] = tkn.type
                     next_id += 1
                     log.debug('%s %s => #%d' % (what, tkn.type, tkn.id))
                 else:
                     tkn.id = id
         what = 'token'
예제 #4
0
 def classify_keywords(self, tokens):
     for tkn in tokens:
         if tkn.type is not None:
             continue
         for kw in self.active_keywords[-1]:
             if tkn.str == kw.match:
                 tkn.type = kw.type
                 break
         if tkn.type is not None:
             continue
         for kw in self.active_keywords[0]:
             if tkn.str == kw.match:
                 tkn.type = kw.type
                 break
         if tkn.type is not None:
             log.debug('token %s: keyword %s' % (tkn.str, tkn.type))
예제 #5
0
    def classify_tokens(self, tokens):
        for tkn in tokens:
            if tkn.type is not None:
                continue

            contexts = [self.active_tokens[-1], self.active_keywords[-1]]
            if len(self.active_tokens) > 1:
                contexts.append(self.active_tokens[0])
                contexts.append(self.active_keywords[0])

            for ctx in contexts:
                for tkndef in ctx:
                    tkndef.classify(tkn)
            if tkn.type is None:
                tkn.type = '_token_'
            if tkn.type == '_comma_':
                tkn.type = ','
            if tkn.type == '_dash_':
                tkn.type = '-'

            log.debug('token %s: token %s' % (tkn.str, tkn.type))
예제 #6
0
        def classify(self, tkn):
            if tkn.type is not None:
                return
            if type(self.match) == type(''):
                log.debug('testing token %s with %s' % (tkn.str, self.match))
                if tkn.str == self.match:
                    tkn.type = self.type
            elif type(self.match) == Lexer.regexp_type:
                log.debug('testing token %s with %s' %
                          (tkn.str, self.match.pattern))

                m = self.match.match(tkn.str)
                if m is not None and m.group(0) == tkn.str:
                    tkn.type = self.type
            elif callable(self.match):
                self.match(tkn)

            if tkn.type is not None:
                #log.debug('token %s: type %s' % (tkn.str, tkn.type))
                pass

            return True if tkn.type is not None else False
예제 #7
0
    def match_rule(self, rules, tknstr):
        max = 0
        rule = None
        match = None
        for r in rules:
            log.debug('matching "%s" against "%s"' % (tknstr, r.re.pattern))
            m = r.re.match(tknstr)
            if m is not None:
                log.debug(' => match (%s)' % m.group(0))
                l = len(m.group(0))
                if l > max:
                    max = l
                    rule = r
                    match = m
            else:
                log.debug(' => mismatch')

        return rule, match.group(0) if match else None
예제 #8
0
 def pop_context(self):
     log.debug('pop_context')
     self.active_keywords.pop(-1)
     self.active_tokens.pop(-1)
예제 #9
0
 def push_context(self, name):
     log.debug('push_context %s' % name)
     kl = self.keywords[name] if name in self.keywords.keys() else []
     tl = self.tokens[name] if name in self.tokens.keys() else []
     self.active_keywords.append(kl)
     self.active_tokens.append(tl)
예제 #10
0
    def parse_node(self, node_tkn, parent):
        node_name = node_tkn.str
        log.debug('parsing node %s...' % node_name)

        if node_name not in self.nodes.keys():
            if not self.demand_load(node_name):
                raise RuntimeError('%s:%d: unknown node type %s' %
                                   (self.where(node_tkn) + (node_name, )))

        self.push_context(node_name)

        nodedef = self.nodes[node_name]
        extra = self.pull_tokens(node_tkn.level, nodedef.extra_tokens)
        root = self.root
        node = nodedef.type(nodedef, root, parent, node_tkn, *extra)
        tokens = self.pull_tokens(node_tkn.level)

        log.debug('%s block: %s' % (node_name, ' '.join(x.str
                                                        for x in tokens)))

        while tokens:
            xlated = self.translate_tokens(tokens)
            tknstr = ' '.join(x.type for x in tokens)
            xltstr = re.sub(r' , ', ', ', ' '.join(str(x) for x in xlated))

            log.debug('%s xlated to %s' % (tknstr, xltstr))

            rule, match = self.match_rule(self.rules[node_name], xltstr)

            if rule is None:
                if tokens[0].str in self.nodes.keys() or \
                   self.demand_load(tokens[0].str):
                    self.pushback_tokens(tokens[1:])
                    c_tkn = tokens[0]
                    c = self.parse_node(c_tkn, node)

                    if c is None:
                        raise RuntimeError('%s:%d: failed to parse' %
                                           self.where(c_tkn))
                else:
                    log.debug('pushing back tokens %s' %
                              ','.join([x.str for x in tokens]))
                    self.pushback_tokens(tokens[0:])
                    self.pop_context()
                    return node
                tokens = self.pull_tokens(node_tkn.level)
            else:
                log.debug('%s => %s (%s)' %
                          (tknstr, rule.re.pattern, rule.callback))
                n = match.count(' ') + match.count(',') + 1
                args = tokens[0:n]
                tokens = tokens[n:]
                log.debug('matched tokens %d => %s' %
                          (n, ' '.join(x.str for x in tokens)))

                c = getattr(node, rule.callback)
                if c is None:
                    RuntimeError('%s has no method %s' %
                                 (str(nodedef.type), rule.callback))
                else:
                    c(*args)

        self.pop_context()

        return node