コード例 #1
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
    def __init__(self, plugin_options, reserved_ids):
        def section_id_formatter(section_id):
            path = section_id.split('.') if section_id is not None else None
            path = [i.upper().replace(':', '')
                    for i in path] if path is not None else None
            return '_'.join(
                path[1:]) if section_id is not None and len(path) > 0 else ''

        def token_id_formatter(id):
            return id.upper() if id is not None else None

        self.plugin_options = plugin_options
        self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
        self.cache.add_cache('section_id',
                             section_id_formatter,
                             cache_name='section_and_tokens')
        self.cache.add_cache('token_id',
                             token_id_formatter,
                             cache_name='section_and_tokens')
        self.cache.add_token_id(None, 'ANONYMOUS')
        self.cache.add_token_id('endofstream', 'ENDOFSTREAM')
        self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER')
        self.cache.add_section_id(None, '')
        self.cache.add_section_id('::main::', 'MAIN')
        self.reserved_ids = reserved_ids
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))
コード例 #2
0
ファイル: Dot.py プロジェクト: bihai/poodle-lex
 def __init__(self, rules_file, dependencies, plugin_options):
     def formatter(id, prefix=''):
         if id is not None:
             path = id.split('.')
             for i in range(len(path)):
                 path[i] = path[i].lower().replace(':', '')
             if len(path) > 1:
                 return prefix + '_'.join(path[1:])
             else:
                 return prefix + 'main'
         return prefix + 'anonymous'
         
     def section_formatter(id):
         return formatter(id, 'cluster_')
 
     self.rules_file = rules_file
     self.dependencies = dependencies
     self.plugin_options = plugin_options
     self.file_name = self.plugin_options.file_name
     if self.file_name is None:
         self.file_name = "LexicalAnalyzer"
     self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids)
     self.cache.add_cache('id', formatter, 'ids')
     self.cache.add_cache('section', section_formatter, 'ids')
     self.state_map = {}
     self.exit_vertices = {}
     self.entry_vertex = None
コード例 #3
0
 def __init__(self, rules_file, dependencies, plugin_options):
     self.rules_file = rules_file
     self.dependencies = dependencies
     self.plugin_options = plugin_options
     self.file_name = self.plugin_options.file_name
     if self.file_name is None:
         self.file_name = "LexicalAnalyzer"
     self.cache = CachedFormatter(limit=64, reserved=[])
     self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids')
     self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids')
     self.cache.add_cache('state', lambda state_and_name: state_and_name[1],
                          'ids')
コード例 #4
0
 def __init__(self, rules, reserved_ids):
     def state_id_formatter(state):
         state_id = ''
         for rule in self.rules:
             if rule.id in state.ids:    
                 state_id += rule.name if rule.name is not None else 'Anonymous'
         return state_id
     self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
     self.cache.add_cache('state_id', state_id_formatter)
     self.rules = rules
     for attr in dir(self.cache):
         if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
             setattr(self, attr, getattr(self.cache, attr))
コード例 #5
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
class StateIdFormatter(object):
    def __init__(self, rules, reserved_ids):
        def state_id_formatter(state):
            state_id = ''
            for rule in self.rules:
                if rule.id in state.ids:    
                    state_id += rule.name if rule.name is not None else 'Anonymous'
            return state_id
        self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
        self.cache.add_cache('state_id', state_id_formatter)
        self.rules = rules
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))
コード例 #6
0
ファイル: Dot.py プロジェクト: parkertomatoes/poodle-lex
    def __init__(self, rules_file, dependencies, plugin_options):
        def formatter(id, prefix=""):
            if id is not None:
                path = id.split(".")
                for i in range(len(path)):
                    path[i] = path[i].lower().replace(":", "")
                if len(path) > 1:
                    return prefix + "_".join(path[1:])
                else:
                    return prefix + "main"
            return prefix + "anonymous"

        def section_formatter(id):
            return formatter(id, "cluster_")

        self.rules_file = rules_file
        self.dependencies = dependencies
        self.plugin_options = plugin_options
        self.file_name = self.plugin_options.file_name
        if self.file_name is None:
            self.file_name = "LexicalAnalyzer"
        self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids)
        self.cache.add_cache("id", formatter, "ids")
        self.cache.add_cache("section", section_formatter, "ids")
        self.state_map = {}
        self.exit_vertices = {}
        self.entry_vertex = None
コード例 #7
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
    def __init__(self, rules, reserved_ids):
        def state_id_formatter(state):
            state_id = 'STATE_'
            for rule in self.rules:
                if rule.id in state.ids:
                    if rule.name is None:
                        state_id += '_ANONYMOUS'
                    else:
                        state_id += '_' + rule.name.upper()
            return state_id

        self.cache = CachedFormatter(limit=512, reserved=reserved_ids)
        self.cache.add_cache('state_id', state_id_formatter)
        self.rules = rules
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))
コード例 #8
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
class StateIdFormatter(object):
    def __init__(self, rules, reserved_ids):
        def state_id_formatter(state):
            state_id = 'STATE_'
            for rule in self.rules:
                if rule.id in state.ids:
                    if rule.name is None:
                        state_id += '_ANONYMOUS'
                    else:
                        state_id += '_' + rule.name.upper()
            return state_id
        self.cache = CachedFormatter(limit=512, reserved=reserved_ids)
        self.cache.add_cache('state_id', state_id_formatter)
        self.rules = rules
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))
コード例 #9
0
 def __init__(self, plugin_options, reserved_ids, poodle_namespace):
     def section_id_formatter(section_id):
         path = section_id.split('.') if section_id is not None else None
         path = [i.replace(':', '') for i in path] if path is not None else None
         return ''.join(path[1:]) if section_id is not None and len(path) > 0 else ''
     def token_id_formatter(id):
         return id
         
     self.poodle_namespace = poodle_namespace
     self.plugin_options = plugin_options
     self.reserved_ids = reserved_ids
     self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
     self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens')
     self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens')
     self.cache.add_section_id(None, '')
     self.cache.add_section_id('::main::', 'Main')
     for attr in dir(self.cache):
         if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
             setattr(self, attr, getattr(self.cache, attr))
コード例 #10
0
ファイル: Xml.py プロジェクト: bihai/poodle-lex
 def __init__(self, rules_file, dependencies, plugin_options):
     self.rules_file = rules_file
     self.dependencies = dependencies
     self.plugin_options = plugin_options
     self.file_name = self.plugin_options.file_name
     if self.file_name is None:
         self.file_name = "LexicalAnalyzer"
     self.cache = CachedFormatter(limit=64, reserved=[])
     self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids')
     self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids')
     self.cache.add_cache('state', lambda state_and_name: state_and_name[1], 'ids')
コード例 #11
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
 def __init__(self, plugin_options, reserved_ids):
     def section_id_formatter(section_id):
         path = section_id.split('.') if section_id is not None else None
         path = [i.upper().replace(':', '') for i in path] if path is not None else None
         return '_'.join(path[1:]) if section_id is not None and len(path) > 0 else ''
     def token_id_formatter(id):
         return id.upper() if id is not None else None
         
     self.plugin_options = plugin_options
     self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
     self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens')
     self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens')
     self.cache.add_token_id(None, 'ANONYMOUS')
     self.cache.add_token_id('endofstream', 'ENDOFSTREAM')
     self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER')
     self.cache.add_section_id(None, '')
     self.cache.add_section_id('::main::', 'MAIN')
     self.reserved_ids = reserved_ids
     for attr in dir(self.cache):
         if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
             setattr(self, attr, getattr(self.cache, attr))
コード例 #12
0
ファイル: Dot.py プロジェクト: bihai/poodle-lex
class DotPlugin(PluginTemplate):
    reserved_ids = ['strict', 'graph', 'digraph', 'node', 'edge', 'subgraph']

    def __init__(self, rules_file, dependencies, plugin_options):
        def formatter(id, prefix=''):
            if id is not None:
                path = id.split('.')
                for i in range(len(path)):
                    path[i] = path[i].lower().replace(':', '')
                if len(path) > 1:
                    return prefix + '_'.join(path[1:])
                else:
                    return prefix + 'main'
            return prefix + 'anonymous'
            
        def section_formatter(id):
            return formatter(id, 'cluster_')
    
        self.rules_file = rules_file
        self.dependencies = dependencies
        self.plugin_options = plugin_options
        self.file_name = self.plugin_options.file_name
        if self.file_name is None:
            self.file_name = "LexicalAnalyzer"
        self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids)
        self.cache.add_cache('id', formatter, 'ids')
        self.cache.add_cache('section', section_formatter, 'ids')
        self.state_map = {}
        self.exit_vertices = {}
        self.entry_vertex = None

    # PluginTemplate interface
    def process(self, token):
        if token.token == "CONTENT":
            self.generate_content(token)
    
    def get_output_directories(self):
        return []
        
    def get_files_to_copy(self):
        return []
        
    def get_files_to_generate(self):
        return [("LexicalAnalyzer.dot", self.file_name + ".dot")]
        
    # Private methods
    def generate_content(self, token):
        code = CodeEmitter(token.stream)
        code.indent()
        
        # Generate the state machine factory
        self.get_state_machine = lambda section: section.dfa
        if self.plugin_options.form == self.plugin_options.NFA_IR:
            nfas = {}
            for section in self.rules_file.sections.values():
                nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules)
            self.get_state_machine = lambda section: nfas[section]

        # Generate vertices
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            with code.block("subgraph {0} {{".format(formatted_id), "}"):
                code.line('label="{0}"'.format(self.get_section_label(section, id)))
                self.generate_vertices(code, formatted_id, section)
        for vertex in self.exit_vertices.values():
            self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True)
        
        code.line()
        
        #Generate edges
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            self.generate_edges(code, formatted_id, section)
            
    @staticmethod
    def get_section_label(section, id):
        attributes = []
        if section.inherits:
            attributes.append('inherits')
        elif section.exits:
            attributes.append('exits')
        if any(attributes):
            return "{0} ({1})".format(id, ','.join(attributes))
        else:
            return id
                
    @staticmethod
    def get_rule_ids(rules, ids):
        for rule in rules:
            if rule.id in ids:
                if rule.name is not None:
                    yield rule.name
                else:
                    yield 'Anonymous'
                    
    @staticmethod
    def get_matching_rule(rules, ids):
        return next((rule for rule in rules if rule.id in ids), None)
                    
    def allocate_vertex(self, state=None):
        key = state
        value = self.state_map.get(key)
        if value is None:
            value = len(self.state_map)
            if key == None:
                key = value
            self.state_map[key] = value
        return value
        
    @staticmethod
    def draw_vertex(code, id, label=None, ids=None, final_ids=None, is_final=False, is_shapeless=False):
        text_label = label
        if text_label is None:
            text_label = id
        if is_shapeless:
            code.line('{i} [label="{label}", shape=none];'.format(i=id, label=text_label))
        elif ids is not None and not is_final:
            code.line('{i} [label="({ids})"];'.format(i=id, label=text_label, ids=ids))
        elif final_ids is None and is_final:
            code.line('{i} [label="{label}", shape=octagon];'.format(i=id, label=text_label))
        elif final_ids is not None and is_final:
            code.line('{i} [label="({ids})\\n({final_ids})", shape=octagon];'.format(i=id, label=text_label, ids=ids, final_ids=final_ids))
            
    @staticmethod
    def draw_edge(code, start, end, label=None):
        if label is None:
            code.line('{0} -> {1}'.format(start, end))
        else:
            code.line('{0} -> {1} [label="{2}"]'.format(start, end, label))
             
    @staticmethod
    def format_codepoint(codepoint):
        if codepoint == ord('"'):
            return "'\\\"'"
        if codepoint in xrange(32, 127):
            return "'%s'" % chr(codepoint).replace("\\", "\\\\")
        else:
            return "0x%x" % codepoint
    
    @staticmethod
    def format_case(range):
        if range[0] == range[1]:
            return "%s" % DotPlugin.format_codepoint(range[0])
        else:
            return "%s-%s" % tuple([DotPlugin.format_codepoint(i) for i in range])
            
    def allocate_vertices(self, section, id, state_machine):
        for state in state_machine:
            self.allocate_vertex(state)
        if id == "cluster_main":
            self.entry_vertex = self.allocate_vertex()
        exit_vertex = None
        if any(rule for rule in section.rules if rule.section_action is not None and rule.section_action[0] == 'exit'):
            exit_vertex = self.allocate_vertex()
            self.exit_vertices[id] = exit_vertex
    
    def generate_vertices(self, code, id, section):
        state_machine = self.get_state_machine(section)
        self.allocate_vertices(section, id, state_machine)
        
        for state in state_machine:
            i = self.state_map[state]
            ids = None
            if any(state.ids):
                ids = ', '.join(self.get_rule_ids(section.rules, state.ids))
            final_ids = None
            if any(state.final_ids):
                final_ids = ', '.join(self.get_rule_ids(section.rules, state.final_ids))
            self.draw_vertex(code, id=i, ids=ids, final_ids=final_ids, is_final=any(state.final_ids))
        if self.entry_vertex is not None:
            self.draw_vertex(code, id=self.entry_vertex, label="start", is_shapeless=True)
            
    def generate_edges(self, code, id, section):
        state_machine = self.get_state_machine(section)
        
        if self.entry_vertex is not None and id == 'cluster_main':
            self.draw_edge(code, self.entry_vertex, self.state_map[state_machine.start_state])
            
        for state in state_machine:
            for destination, edge in state.edges.items():
                edge_label = ", ".join([self.format_case(i) for i in edge])
                edge_label = '\\n'.join(textwrap.wrap(edge_label, 256))
                self.draw_edge(code, self.state_map[state], self.state_map[destination], edge_label)
                
            if hasattr(state, 'epsilon_edges'):
                for destination in state.epsilon_edges:
                    self.draw_edge(code, self.state_map[state], self.state_map[destination], 'ε')
                
            if any(state.final_ids):
                rule = self.get_matching_rule(section.rules, state.final_ids)
                if rule is not None and rule.section_action is not None and rule.section_action[0] is not None:
                    label = rule.section_action[0]
                    destination = None
                    if rule.section_action[0] == 'exit':
                        destination = self.exit_vertices[id]
                    elif rule.section_action[0] in ('enter', 'switch'):
                        destination_section = self.rules_file.sections[rule.section_action[1]]
                        destination = self.get_state_machine(destination_section).start_state
                        if destination not in self.state_map:
                            self.allocate_vertex(destination)
                        destination = self.state_map[destination]
                    if destination is not None:
                        self.draw_edge(code, self.state_map[state], destination, label)
                        
コード例 #13
0
ファイル: Xml.py プロジェクト: bihai/poodle-lex
class XmlPlugin(PluginTemplate):
    def __init__(self, rules_file, dependencies, plugin_options):
        self.rules_file = rules_file
        self.dependencies = dependencies
        self.plugin_options = plugin_options
        self.file_name = self.plugin_options.file_name
        if self.file_name is None:
            self.file_name = "LexicalAnalyzer"
        self.cache = CachedFormatter(limit=64, reserved=[])
        self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids')
        self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids')
        self.cache.add_cache('state', lambda state_and_name: state_and_name[1], 'ids')

    # PluginTemplate interface
    def process(self, token):
        if token.token == "ROOT":
            self.generate_root(token)
    
    def get_output_directories(self):
        return []
        
    def get_files_to_copy(self):
        return []
        
    def get_files_to_generate(self):
        return [("LexicalAnalyzer.xml", self.file_name + ".xml")]

    # Private utility methods
    def get_form(self):
        return "NFA"if self.plugin_options.form == self.plugin_options.NFA_IR else "DFA"

    @staticmethod
    def get_rules(ids, section):
        return (rule for rule in section.rules if rule.id in ids)
        
    @staticmethod
    def get_rule_name(rule):
        return rule.name or 'Anonymous'
        
    @staticmethod
    def get_rule_names(ids, section):
        return (XmlPlugin.get_rule_name(rule) for rule in XmlPlugin.get_rules(ids, section))
        
    def format_state_id(self, state, section):
        return self.cache.get_state((state, ''.join(self.get_rule_names(state.ids, section)) or 'State'))
        
    def format_rule_id(self, rule, section):
        return self.cache.get_rule((rule.id, self.get_rule_name(rule) or 'Rule'))
        
    # Generation methods
    def generate_root(self, token):
        E = self.dependencies["ElementTreeFactory"]._E()
        root = E.LexicalAnalyzer(
            E.Form(self.get_form()),
            E.Sections(*list(self.generate_sections(E))),
            xmlns="https://github.com/parkertomatoes/poodle-lex")
        pretty_print(root, token.stream)
            
    def generate_sections(self, E):
        for id, section in self.rules_file.sections.items():
            yield self.generate_section(E, id, section)
                
    def generate_section(self, E, id, section):
        def section_rules():
            return [E.Rules(*list(self.generate_rules(E, section)))]
        def section_state_machine():
            return self.generate_state_machine(E, section, section.dfa)
        def section_attributes():
            return {'id': self.cache.get_id(id), 'inherits': str(section.inherits).lower(), 'exits': str(section.exits).lower()}
        return E.Section(
            *(section_rules() + maybe(hasattr(section, 'dfa'), section_state_machine)), 
            **section_attributes())
    
    def generate_rules(self, E, section):
        for rule in section.rules:
            yield self.generate_rule(E, section, rule)
            
    def generate_rule(self, E, section, rule):
        def rule_actions():
            return E.Actions(*(E.Action(action) for action in rule.action))
        def rule_section_action():
            return E.SectionAction(*list(
                [E.Action(rule.section_action[0])]  
                + maybe(rule.section_action[1] is not None, lambda: E.Section(rule.section_action[1]))
            ))
        def rule_nfa():
            return self.generate_state_machine(E, section, rule.nfa)
        def get_rule_id():
            return self.format_rule_id(rule, section)
        def get_rule_name():
            return self.get_rule_name(rule)
        def rule_attributes():
            if hasattr(rule, 'line_number'):
                return {'name': get_rule_name(), 'id': get_rule_id(), 'line_number': str(rule.line_number)}
            else:
                return {'name': get_rule_name(), 'id': get_rule_id()}
        
        return E.Rule(*(
            maybe(any(rule.action), rule_actions)
            + maybe(
                rule.section_action is not None and rule.section_action[0] is not None,
                rule_section_action)
            + maybe(hasattr(rule, 'nfa'), rule_nfa)
        ), **rule_attributes())
        
    def generate_state_machine(self, E, section, state_machine):
        def state_machine_start_state():
            return self.format_state_id(state_machine.start_state, section)
        def state_machine_end_state():
            return self.format_state_id(state_machine.end_state, section)
        def state_machine_attributes():
            if hasattr(state_machine, 'end_state'):
                return {'start': state_machine_start_state(), 'end': state_machine_end_state()}
            else:
                return {'start': state_machine_start_state()}
            
        return E.StateMachine(
            *list(self.generate_states(E, section, state_machine)), 
            **state_machine_attributes()
        )
        
    def generate_states(self, E, section, state_machine):
        for state in state_machine:
            yield self.generate_state(E, section, state)

    def generate_state(self, E, section, state):
        return E.State(
            E.Ids(*self.generate_state_ids(E, section, state.ids)),
            E.FinalIds(*self.generate_state_ids(E, section, state.final_ids)),
            E.Transitions(*list(self.generate_transitions(E, section, state))),
            id=self.format_state_id(state, section)
        )

    def generate_state_ids(self, E, section, ids):
        for rule in self.get_rules(ids, section):
            yield E.Id(self.format_rule_id(rule, section))
        
    def generate_transitions(self, E, section, state):
        return (list(self.generate_edge_transitions(E, section, state))
            + maybe_list(hasattr(state, 'epsilon_edges'), lambda: self.generate_epsilon_transitions(E, section, state)))
        
    def generate_edge_transitions(self, E, section, state):
        for destination, edge in state.edges.items():
            yield E.Transition(
                *list(self.generate_edge(E, section, state, destination, edge)),
                Destination=self.format_state_id(destination, section)
            )
            
    def generate_edge(self, E, section, state, destination, edge):
        for (minv, maxv) in edge:
            if minv == maxv:
                yield E.Codepoint(str(minv))
            else:
                yield E.Range(start=str(minv), end=str(maxv))
        
    def generate_epsilon_transitions(self, E, section, state):
        for destination in state.epsilon_edges:
            yield E.Transition(Destination=self.format_state_id(destination, section))
            
コード例 #14
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
class VariableFormatter(object):
    def __init__(self, plugin_options, reserved_ids):
        def section_id_formatter(section_id):
            path = section_id.split('.') if section_id is not None else None
            path = [i.upper().replace(':', '') for i in path] if path is not None else None
            return '_'.join(path[1:]) if section_id is not None and len(path) > 0 else ''
        def token_id_formatter(id):
            return id.upper() if id is not None else None
            
        self.plugin_options = plugin_options
        self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
        self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens')
        self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens')
        self.cache.add_token_id(None, 'ANONYMOUS')
        self.cache.add_token_id('endofstream', 'ENDOFSTREAM')
        self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER')
        self.cache.add_section_id(None, '')
        self.cache.add_section_id('::main::', 'MAIN')
        self.reserved_ids = reserved_ids
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))

    def get_class_name(self):
        return self.plugin_options.class_name

    def get_scoped(self, id, is_relative):
        if is_relative:
            return id
        else:
            return '{class_name}::{type}'.format(class_name=self.get_class_name(), type=id)

    def get_state_machine_method_name(self, section, is_relative):
        section_id = self.cache.get_section_id(section).lower()
        if section_id != '':
            section_id = '_' + section_id
        method_name = 'get_token{id}'.format(id=section_id)
        return self.get_scoped(method_name, is_relative)
            
    def get_type(self, type_name, is_relative):
        if type_name == 'mode_stack':
            return "std::stack<{mode_type}>".format(mode_type=self.get_type('mode', is_relative))
        elif type_name == 'mode':
            type_text = 'Mode'
        elif type_name == 'token':
            type_text = 'Token'
        elif type_name == 'state':
            return 'State'
        else:
            raise Exception("unrecognized type '{id}'".format(id=type_name))
        return self.get_scoped(type_text, is_relative)

    def get_state_id_formatter(self, rules):
        return StateIdFormatter(rules, self.reserved_ids)
コード例 #15
0
ファイル: Dot.py プロジェクト: parkertomatoes/poodle-lex
class DotPlugin(PluginTemplate):
    reserved_ids = ["strict", "graph", "digraph", "node", "edge", "subgraph"]

    def __init__(self, rules_file, dependencies, plugin_options):
        def formatter(id, prefix=""):
            if id is not None:
                path = id.split(".")
                for i in range(len(path)):
                    path[i] = path[i].lower().replace(":", "")
                if len(path) > 1:
                    return prefix + "_".join(path[1:])
                else:
                    return prefix + "main"
            return prefix + "anonymous"

        def section_formatter(id):
            return formatter(id, "cluster_")

        self.rules_file = rules_file
        self.dependencies = dependencies
        self.plugin_options = plugin_options
        self.file_name = self.plugin_options.file_name
        if self.file_name is None:
            self.file_name = "LexicalAnalyzer"
        self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids)
        self.cache.add_cache("id", formatter, "ids")
        self.cache.add_cache("section", section_formatter, "ids")
        self.state_map = {}
        self.exit_vertices = {}
        self.entry_vertex = None

    # PluginTemplate interface
    def process(self, token):
        if token.token == "CONTENT":
            self.generate_content(token)

    def get_output_directories(self):
        return []

    def get_files_to_copy(self):
        return []

    def get_files_to_generate(self):
        return [("LexicalAnalyzer.dot", self.file_name + ".dot")]

    # Private methods
    def generate_content(self, token):
        code = CodeEmitter(token.stream)
        code.indent()

        # Generate the state machine factory
        self.get_state_machine = lambda section: section.dfa
        if self.plugin_options.form == self.plugin_options.NFA_IR:
            nfas = {}
            for section in self.rules_file.sections.values():
                nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules)
            self.get_state_machine = lambda section: nfas[section]

        # Generate vertices
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            with code.block("subgraph {0} {{".format(formatted_id), "}"):
                code.line('label="{0}"'.format(self.get_section_label(section, id)))
                self.generate_vertices(code, formatted_id, section)
        for vertex in self.exit_vertices.values():
            self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True)

        code.line()

        # Generate edges
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            self.generate_edges(code, formatted_id, section)

    @staticmethod
    def get_section_label(section, id):
        attributes = []
        if section.inherits:
            attributes.append("inherits")
        elif section.exits:
            attributes.append("exits")
        if any(attributes):
            return "{0} ({1})".format(id, ",".join(attributes))
        else:
            return id

    @staticmethod
    def get_rule_ids(rules, ids):
        for rule in rules:
            if rule.id in ids:
                if rule.name is not None:
                    yield rule.name
                else:
                    yield "Anonymous"

    @staticmethod
    def get_matching_rule(rules, ids):
        return next((rule for rule in rules if rule.id in ids), None)

    def allocate_vertex(self, state=None):
        key = state
        value = self.state_map.get(key)
        if value is None:
            value = len(self.state_map)
            if key == None:
                key = value
            self.state_map[key] = value
        return value

    @staticmethod
    def draw_vertex(code, id, label=None, ids=None, final_ids=None, is_final=False, is_shapeless=False):
        text_label = label
        if text_label is None:
            text_label = id
        if is_shapeless:
            code.line('{i} [label="{label}", shape=none];'.format(i=id, label=text_label))
        elif ids is not None and not is_final:
            code.line('{i} [label="({ids})"];'.format(i=id, label=text_label, ids=ids))
        elif final_ids is None and is_final:
            code.line('{i} [label="{label}", shape=octagon];'.format(i=id, label=text_label))
        elif final_ids is not None and is_final:
            code.line(
                '{i} [label="({ids})\\n({final_ids})", shape=octagon];'.format(
                    i=id, label=text_label, ids=ids, final_ids=final_ids
                )
            )

    @staticmethod
    def draw_edge(code, start, end, label=None):
        if label is None:
            code.line("{0} -> {1}".format(start, end))
        else:
            code.line('{0} -> {1} [label="{2}"]'.format(start, end, label))

    @staticmethod
    def format_codepoint(codepoint):
        if codepoint == ord('"'):
            return "'\\\"'"
        if codepoint in xrange(32, 127):
            return "'%s'" % chr(codepoint).replace("\\", "\\\\")
        else:
            return "0x%x" % codepoint

    @staticmethod
    def format_case(range):
        if range[0] == range[1]:
            return "%s" % DotPlugin.format_codepoint(range[0])
        else:
            return "%s-%s" % tuple([DotPlugin.format_codepoint(i) for i in range])

    def allocate_vertices(self, section, id, state_machine):
        for state in state_machine:
            self.allocate_vertex(state)
        if id == "cluster_main":
            self.entry_vertex = self.allocate_vertex()
        exit_vertex = None
        if any(rule for rule in section.rules if rule.section_action is not None and rule.section_action[0] == "exit"):
            exit_vertex = self.allocate_vertex()
            self.exit_vertices[id] = exit_vertex

    def generate_vertices(self, code, id, section):
        state_machine = self.get_state_machine(section)
        self.allocate_vertices(section, id, state_machine)

        for state in state_machine:
            i = self.state_map[state]
            ids = None
            if any(state.ids):
                ids = ", ".join(self.get_rule_ids(section.rules, state.ids))
            final_ids = None
            if any(state.final_ids):
                final_ids = ", ".join(self.get_rule_ids(section.rules, state.final_ids))
            self.draw_vertex(code, id=i, ids=ids, final_ids=final_ids, is_final=any(state.final_ids))
        if self.entry_vertex is not None:
            self.draw_vertex(code, id=self.entry_vertex, label="start", is_shapeless=True)

    def generate_edges(self, code, id, section):
        state_machine = self.get_state_machine(section)

        if self.entry_vertex is not None and id == "cluster_main":
            self.draw_edge(code, self.entry_vertex, self.state_map[state_machine.start_state])

        for state in state_machine:
            for destination, edge in state.edges.items():
                edge_label = ", ".join([self.format_case(i) for i in edge])
                edge_label = "\\n".join(textwrap.wrap(edge_label, 256))
                self.draw_edge(code, self.state_map[state], self.state_map[destination], edge_label)

            if hasattr(state, "epsilon_edges"):
                for destination in state.epsilon_edges:
                    self.draw_edge(code, self.state_map[state], self.state_map[destination], "&epsilon;")

            if any(state.final_ids):
                rule = self.get_matching_rule(section.rules, state.final_ids)
                if rule is not None and rule.section_action is not None and rule.section_action[0] is not None:
                    label = rule.section_action[0]
                    destination = None
                    if rule.section_action[0] == "exit":
                        destination = self.exit_vertices[id]
                    elif rule.section_action[0] in ("enter", "switch"):
                        destination_section = self.rules_file.sections[rule.section_action[1]]
                        destination = self.get_state_machine(destination_section).start_state
                        if destination not in self.state_map:
                            self.allocate_vertex(destination)
                        destination = self.state_map[destination]
                    if destination is not None:
                        self.draw_edge(code, self.state_map[state], destination, label)
コード例 #16
0
class XmlPlugin(PluginTemplate):
    def __init__(self, rules_file, dependencies, plugin_options):
        self.rules_file = rules_file
        self.dependencies = dependencies
        self.plugin_options = plugin_options
        self.file_name = self.plugin_options.file_name
        if self.file_name is None:
            self.file_name = "LexicalAnalyzer"
        self.cache = CachedFormatter(limit=64, reserved=[])
        self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids')
        self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids')
        self.cache.add_cache('state', lambda state_and_name: state_and_name[1],
                             'ids')

    # PluginTemplate interface
    def process(self, token):
        if token.token == "ROOT":
            self.generate_root(token)

    def get_output_directories(self):
        return []

    def get_files_to_copy(self):
        return []

    def get_files_to_generate(self):
        return [("LexicalAnalyzer.xml", self.file_name + ".xml")]

    # Private utility methods
    def get_form(self):
        return "NFA" if self.plugin_options.form == self.plugin_options.NFA_IR else "DFA"

    @staticmethod
    def get_rules(ids, section):
        return (rule for rule in section.rules if rule.id in ids)

    @staticmethod
    def get_rule_name(rule):
        return rule.name or 'Anonymous'

    @staticmethod
    def get_rule_names(ids, section):
        return (XmlPlugin.get_rule_name(rule)
                for rule in XmlPlugin.get_rules(ids, section))

    def format_state_id(self, state, section):
        return self.cache.get_state(
            (state, ''.join(self.get_rule_names(state.ids, section))
             or 'State'))

    def format_rule_id(self, rule, section):
        return self.cache.get_rule((rule.id, self.get_rule_name(rule)
                                    or 'Rule'))

    # Generation methods
    def generate_root(self, token):
        E = self.dependencies["ElementTreeFactory"]._E()
        root = E.LexicalAnalyzer(
            E.Form(self.get_form()),
            E.Sections(*list(self.generate_sections(E))),
            xmlns="https://github.com/parkertomatoes/poodle-lex")
        pretty_print(root, token.stream)

    def generate_sections(self, E):
        for id, section in self.rules_file.sections.items():
            yield self.generate_section(E, id, section)

    def generate_section(self, E, id, section):
        def section_rules():
            return [E.Rules(*list(self.generate_rules(E, section)))]

        def section_state_machine():
            return self.generate_state_machine(E, section, section.dfa)

        def section_attributes():
            return {
                'id': self.cache.get_id(id),
                'inherits': str(section.inherits).lower(),
                'exits': str(section.exits).lower()
            }

        return E.Section(
            *(section_rules() +
              maybe(hasattr(section, 'dfa'), section_state_machine)),
            **section_attributes())

    def generate_rules(self, E, section):
        for rule in section.rules:
            yield self.generate_rule(E, section, rule)

    def generate_rule(self, E, section, rule):
        def rule_actions():
            return E.Actions(*(E.Action(action) for action in rule.action))

        def rule_section_action():
            return E.SectionAction(
                *list([E.Action(rule.section_action[0])] +
                      maybe(rule.section_action[1] is not None,
                            lambda: E.Section(rule.section_action[1]))))

        def rule_nfa():
            return self.generate_state_machine(E, section, rule.nfa)

        def get_rule_id():
            return self.format_rule_id(rule, section)

        def get_rule_name():
            return self.get_rule_name(rule)

        def rule_attributes():
            if hasattr(rule, 'line_number'):
                return {
                    'name': get_rule_name(),
                    'id': get_rule_id(),
                    'line_number': str(rule.line_number)
                }
            else:
                return {'name': get_rule_name(), 'id': get_rule_id()}

        return E.Rule(
            *(maybe(any(rule.action), rule_actions) + maybe(
                rule.section_action is not None
                and rule.section_action[0] is not None, rule_section_action) +
              maybe(hasattr(rule, 'nfa'), rule_nfa)), **rule_attributes())

    def generate_state_machine(self, E, section, state_machine):
        def state_machine_start_state():
            return self.format_state_id(state_machine.start_state, section)

        def state_machine_end_state():
            return self.format_state_id(state_machine.end_state, section)

        def state_machine_attributes():
            if hasattr(state_machine, 'end_state'):
                return {
                    'start': state_machine_start_state(),
                    'end': state_machine_end_state()
                }
            else:
                return {'start': state_machine_start_state()}

        return E.StateMachine(
            *list(self.generate_states(E, section, state_machine)),
            **state_machine_attributes())

    def generate_states(self, E, section, state_machine):
        for state in state_machine:
            yield self.generate_state(E, section, state)

    def generate_state(self, E, section, state):
        return E.State(
            E.Ids(*self.generate_state_ids(E, section, state.ids)),
            E.FinalIds(*self.generate_state_ids(E, section, state.final_ids)),
            E.Transitions(*list(self.generate_transitions(E, section, state))),
            id=self.format_state_id(state, section))

    def generate_state_ids(self, E, section, ids):
        for rule in self.get_rules(ids, section):
            yield E.Id(self.format_rule_id(rule, section))

    def generate_transitions(self, E, section, state):
        return (
            list(self.generate_edge_transitions(E, section, state)) +
            maybe_list(
                hasattr(state, 'epsilon_edges'),
                lambda: self.generate_epsilon_transitions(E, section, state)))

    def generate_edge_transitions(self, E, section, state):
        for destination, edge in state.edges.items():
            yield E.Transition(
                *list(self.generate_edge(E, section, state, destination,
                                         edge)),
                Destination=self.format_state_id(destination, section))

    def generate_edge(self, E, section, state, destination, edge):
        for (minv, maxv) in edge:
            if minv == maxv:
                yield E.Codepoint(str(minv))
            else:
                yield E.Range(start=str(minv), end=str(maxv))

    def generate_epsilon_transitions(self, E, section, state):
        for destination in state.epsilon_edges:
            yield E.Transition(
                Destination=self.format_state_id(destination, section))
コード例 #17
0
ファイル: VariableFormatter.py プロジェクト: bihai/poodle-lex
class VariableFormatter(object):
    def __init__(self, plugin_options, reserved_ids):
        def section_id_formatter(section_id):
            path = section_id.split('.') if section_id is not None else None
            path = [i.upper().replace(':', '')
                    for i in path] if path is not None else None
            return '_'.join(
                path[1:]) if section_id is not None and len(path) > 0 else ''

        def token_id_formatter(id):
            return id.upper() if id is not None else None

        self.plugin_options = plugin_options
        self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
        self.cache.add_cache('section_id',
                             section_id_formatter,
                             cache_name='section_and_tokens')
        self.cache.add_cache('token_id',
                             token_id_formatter,
                             cache_name='section_and_tokens')
        self.cache.add_token_id(None, 'ANONYMOUS')
        self.cache.add_token_id('endofstream', 'ENDOFSTREAM')
        self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER')
        self.cache.add_section_id(None, '')
        self.cache.add_section_id('::main::', 'MAIN')
        self.reserved_ids = reserved_ids
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))

    def get_class_name(self):
        return self.plugin_options.class_name

    def get_scoped(self, id, is_relative):
        if is_relative:
            return id
        else:
            return '{class_name}::{type}'.format(
                class_name=self.get_class_name(), type=id)

    def get_state_machine_method_name(self, section, is_relative):
        section_id = self.cache.get_section_id(section).lower()
        if section_id != '':
            section_id = '_' + section_id
        method_name = 'get_token{id}'.format(id=section_id)
        return self.get_scoped(method_name, is_relative)

    def get_type(self, type_name, is_relative):
        if type_name == 'mode_stack':
            return "std::stack<{mode_type}>".format(
                mode_type=self.get_type('mode', is_relative))
        elif type_name == 'mode':
            type_text = 'Mode'
        elif type_name == 'token':
            type_text = 'Token'
        elif type_name == 'state':
            return 'State'
        else:
            raise Exception("unrecognized type '{id}'".format(id=type_name))
        return self.get_scoped(type_text, is_relative)

    def get_state_id_formatter(self, rules):
        return StateIdFormatter(rules, self.reserved_ids)
コード例 #18
0
class VariableFormatter(object):
    def __init__(self, plugin_options, reserved_ids, poodle_namespace):
        def section_id_formatter(section_id):
            path = section_id.split('.') if section_id is not None else None
            path = [i.replace(':', '') for i in path] if path is not None else None
            return ''.join(path[1:]) if section_id is not None and len(path) > 0 else ''
        def token_id_formatter(id):
            return id
            
        self.poodle_namespace = poodle_namespace
        self.plugin_options = plugin_options
        self.reserved_ids = reserved_ids
        self.cache = CachedFormatter(limit=64, reserved=reserved_ids)
        self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens')
        self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens')
        self.cache.add_section_id(None, '')
        self.cache.add_section_id('::main::', 'Main')
        for attr in dir(self.cache):
            if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')):
                setattr(self, attr, getattr(self.cache, attr))

    def get_class_name(self):
        return self.plugin_options.class_name

    def get_default_encoding(self):
        return self.get_scoped('Unicode.DefaultStringEncoding', is_relative=True, is_custom_namespace=False)
        
    def get_mode_stack_class_name(self):
        return self.get_class_name() + "Mode"
        
    def get_namespace(self):
        return self.plugin_options.namespace

    def get_scoped(self, id, is_relative, is_custom_namespace):
        if is_relative:
            if not is_custom_namespace and self.plugin_options.namespace != self.poodle_namespace:
                return '{poodle}.{type}'.format(poodle=self.poodle_namespace, type=type_text)
            else:
                return id
        else:
            return '{namespace}.{type}'.format(namespace=self.plugin_options.namespace, type=id)

    def get_state_machine_method_name(self, section, is_relative):
        method_name = 'GetToken{id}'.format(id=self.cache.get_section_id(section))
        if not is_relative:
            method_name = '{class_name}.{method_name}'.format(
                class_name=self.get_class_name(),
                method_name=method_name)
        return self.get_scoped(method_name, is_relative, is_custom_namespace=True)
            
    def get_type(self, type_name, is_relative):
        if type_name == 'mode':
            is_custom_namespace = True
            type_text = "{class_name}.ModeId".format(class_name=self.get_mode_stack_class_name())
        elif type_name == 'token':
            is_custom_namespace = True
            type_text = "{class_name}Token".format(class_name=self.plugin_options.class_name)
        elif type_name == 'text':
            is_custom_namespace = False
            type_text = 'Unicode.Text'
        elif type_name == 'encoding':
            is_custom_namespace = False
            type_text = 'Unicode.StringEncoding'
        elif type_name == 'character':
            is_custom_namespace = False
            type_text = 'Unicode.Codepoint'
        elif type_name == 'stream':
            is_custom_namespace = False
            type_text = 'CharacterStream'
        else:
            raise Exception("unrecognized type '{id}'".format(id=type_name))
        return self.get_scoped(type_text, is_relative, is_custom_namespace)

    def get_state_id_formatter(self, rules):
        return StateIdFormatter(rules, self.reserved_ids)
        
    def get_unicode_char_name(self, codepoint):
        try:
            unicode_name = unicodedata.name(unichr(codepoint)).replace(' ', '_').replace('-', '_')
            return "{namespace}_UCS_{name}".format(
                namespace = self.get_namespace().upper(),
                name = unicode_name.upper())[:64]
        except ValueError:
            return "&h%02x" % codepoint