def __init__(self, plugin_options, reserved_ids): def section_id_formatter(section_id): path = section_id.split('.') if section_id is not None else None path = [i.upper().replace(':', '') for i in path] if path is not None else None return '_'.join( path[1:]) if section_id is not None and len(path) > 0 else '' def token_id_formatter(id): return id.upper() if id is not None else None self.plugin_options = plugin_options self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens') self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens') self.cache.add_token_id(None, 'ANONYMOUS') self.cache.add_token_id('endofstream', 'ENDOFSTREAM') self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER') self.cache.add_section_id(None, '') self.cache.add_section_id('::main::', 'MAIN') self.reserved_ids = reserved_ids for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
def __init__(self, rules_file, dependencies, plugin_options): def formatter(id, prefix=''): if id is not None: path = id.split('.') for i in range(len(path)): path[i] = path[i].lower().replace(':', '') if len(path) > 1: return prefix + '_'.join(path[1:]) else: return prefix + 'main' return prefix + 'anonymous' def section_formatter(id): return formatter(id, 'cluster_') self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids) self.cache.add_cache('id', formatter, 'ids') self.cache.add_cache('section', section_formatter, 'ids') self.state_map = {} self.exit_vertices = {} self.entry_vertex = None
def __init__(self, rules_file, dependencies, plugin_options): self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=[]) self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids') self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids') self.cache.add_cache('state', lambda state_and_name: state_and_name[1], 'ids')
def __init__(self, rules, reserved_ids): def state_id_formatter(state): state_id = '' for rule in self.rules: if rule.id in state.ids: state_id += rule.name if rule.name is not None else 'Anonymous' return state_id self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('state_id', state_id_formatter) self.rules = rules for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
class StateIdFormatter(object): def __init__(self, rules, reserved_ids): def state_id_formatter(state): state_id = '' for rule in self.rules: if rule.id in state.ids: state_id += rule.name if rule.name is not None else 'Anonymous' return state_id self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('state_id', state_id_formatter) self.rules = rules for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
def __init__(self, rules_file, dependencies, plugin_options): def formatter(id, prefix=""): if id is not None: path = id.split(".") for i in range(len(path)): path[i] = path[i].lower().replace(":", "") if len(path) > 1: return prefix + "_".join(path[1:]) else: return prefix + "main" return prefix + "anonymous" def section_formatter(id): return formatter(id, "cluster_") self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids) self.cache.add_cache("id", formatter, "ids") self.cache.add_cache("section", section_formatter, "ids") self.state_map = {} self.exit_vertices = {} self.entry_vertex = None
def __init__(self, rules, reserved_ids): def state_id_formatter(state): state_id = 'STATE_' for rule in self.rules: if rule.id in state.ids: if rule.name is None: state_id += '_ANONYMOUS' else: state_id += '_' + rule.name.upper() return state_id self.cache = CachedFormatter(limit=512, reserved=reserved_ids) self.cache.add_cache('state_id', state_id_formatter) self.rules = rules for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
class StateIdFormatter(object): def __init__(self, rules, reserved_ids): def state_id_formatter(state): state_id = 'STATE_' for rule in self.rules: if rule.id in state.ids: if rule.name is None: state_id += '_ANONYMOUS' else: state_id += '_' + rule.name.upper() return state_id self.cache = CachedFormatter(limit=512, reserved=reserved_ids) self.cache.add_cache('state_id', state_id_formatter) self.rules = rules for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
def __init__(self, plugin_options, reserved_ids, poodle_namespace): def section_id_formatter(section_id): path = section_id.split('.') if section_id is not None else None path = [i.replace(':', '') for i in path] if path is not None else None return ''.join(path[1:]) if section_id is not None and len(path) > 0 else '' def token_id_formatter(id): return id self.poodle_namespace = poodle_namespace self.plugin_options = plugin_options self.reserved_ids = reserved_ids self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens') self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens') self.cache.add_section_id(None, '') self.cache.add_section_id('::main::', 'Main') for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
def __init__(self, plugin_options, reserved_ids): def section_id_formatter(section_id): path = section_id.split('.') if section_id is not None else None path = [i.upper().replace(':', '') for i in path] if path is not None else None return '_'.join(path[1:]) if section_id is not None and len(path) > 0 else '' def token_id_formatter(id): return id.upper() if id is not None else None self.plugin_options = plugin_options self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens') self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens') self.cache.add_token_id(None, 'ANONYMOUS') self.cache.add_token_id('endofstream', 'ENDOFSTREAM') self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER') self.cache.add_section_id(None, '') self.cache.add_section_id('::main::', 'MAIN') self.reserved_ids = reserved_ids for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr))
class DotPlugin(PluginTemplate): reserved_ids = ['strict', 'graph', 'digraph', 'node', 'edge', 'subgraph'] def __init__(self, rules_file, dependencies, plugin_options): def formatter(id, prefix=''): if id is not None: path = id.split('.') for i in range(len(path)): path[i] = path[i].lower().replace(':', '') if len(path) > 1: return prefix + '_'.join(path[1:]) else: return prefix + 'main' return prefix + 'anonymous' def section_formatter(id): return formatter(id, 'cluster_') self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids) self.cache.add_cache('id', formatter, 'ids') self.cache.add_cache('section', section_formatter, 'ids') self.state_map = {} self.exit_vertices = {} self.entry_vertex = None # PluginTemplate interface def process(self, token): if token.token == "CONTENT": self.generate_content(token) def get_output_directories(self): return [] def get_files_to_copy(self): return [] def get_files_to_generate(self): return [("LexicalAnalyzer.dot", self.file_name + ".dot")] # Private methods def generate_content(self, token): code = CodeEmitter(token.stream) code.indent() # Generate the state machine factory self.get_state_machine = lambda section: section.dfa if self.plugin_options.form == self.plugin_options.NFA_IR: nfas = {} for section in self.rules_file.sections.values(): nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules) self.get_state_machine = lambda section: nfas[section] # Generate vertices for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) with code.block("subgraph {0} {{".format(formatted_id), "}"): code.line('label="{0}"'.format(self.get_section_label(section, id))) self.generate_vertices(code, formatted_id, section) for vertex in self.exit_vertices.values(): self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True) code.line() #Generate edges for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) self.generate_edges(code, formatted_id, section) @staticmethod def get_section_label(section, id): attributes = [] if section.inherits: attributes.append('inherits') elif section.exits: attributes.append('exits') if any(attributes): return "{0} ({1})".format(id, ','.join(attributes)) else: return id @staticmethod def get_rule_ids(rules, ids): for rule in rules: if rule.id in ids: if rule.name is not None: yield rule.name else: yield 'Anonymous' @staticmethod def get_matching_rule(rules, ids): return next((rule for rule in rules if rule.id in ids), None) def allocate_vertex(self, state=None): key = state value = self.state_map.get(key) if value is None: value = len(self.state_map) if key == None: key = value self.state_map[key] = value return value @staticmethod def draw_vertex(code, id, label=None, ids=None, final_ids=None, is_final=False, is_shapeless=False): text_label = label if text_label is None: text_label = id if is_shapeless: code.line('{i} [label="{label}", shape=none];'.format(i=id, label=text_label)) elif ids is not None and not is_final: code.line('{i} [label="({ids})"];'.format(i=id, label=text_label, ids=ids)) elif final_ids is None and is_final: code.line('{i} [label="{label}", shape=octagon];'.format(i=id, label=text_label)) elif final_ids is not None and is_final: code.line('{i} [label="({ids})\\n({final_ids})", shape=octagon];'.format(i=id, label=text_label, ids=ids, final_ids=final_ids)) @staticmethod def draw_edge(code, start, end, label=None): if label is None: code.line('{0} -> {1}'.format(start, end)) else: code.line('{0} -> {1} [label="{2}"]'.format(start, end, label)) @staticmethod def format_codepoint(codepoint): if codepoint == ord('"'): return "'\\\"'" if codepoint in xrange(32, 127): return "'%s'" % chr(codepoint).replace("\\", "\\\\") else: return "0x%x" % codepoint @staticmethod def format_case(range): if range[0] == range[1]: return "%s" % DotPlugin.format_codepoint(range[0]) else: return "%s-%s" % tuple([DotPlugin.format_codepoint(i) for i in range]) def allocate_vertices(self, section, id, state_machine): for state in state_machine: self.allocate_vertex(state) if id == "cluster_main": self.entry_vertex = self.allocate_vertex() exit_vertex = None if any(rule for rule in section.rules if rule.section_action is not None and rule.section_action[0] == 'exit'): exit_vertex = self.allocate_vertex() self.exit_vertices[id] = exit_vertex def generate_vertices(self, code, id, section): state_machine = self.get_state_machine(section) self.allocate_vertices(section, id, state_machine) for state in state_machine: i = self.state_map[state] ids = None if any(state.ids): ids = ', '.join(self.get_rule_ids(section.rules, state.ids)) final_ids = None if any(state.final_ids): final_ids = ', '.join(self.get_rule_ids(section.rules, state.final_ids)) self.draw_vertex(code, id=i, ids=ids, final_ids=final_ids, is_final=any(state.final_ids)) if self.entry_vertex is not None: self.draw_vertex(code, id=self.entry_vertex, label="start", is_shapeless=True) def generate_edges(self, code, id, section): state_machine = self.get_state_machine(section) if self.entry_vertex is not None and id == 'cluster_main': self.draw_edge(code, self.entry_vertex, self.state_map[state_machine.start_state]) for state in state_machine: for destination, edge in state.edges.items(): edge_label = ", ".join([self.format_case(i) for i in edge]) edge_label = '\\n'.join(textwrap.wrap(edge_label, 256)) self.draw_edge(code, self.state_map[state], self.state_map[destination], edge_label) if hasattr(state, 'epsilon_edges'): for destination in state.epsilon_edges: self.draw_edge(code, self.state_map[state], self.state_map[destination], 'ε') if any(state.final_ids): rule = self.get_matching_rule(section.rules, state.final_ids) if rule is not None and rule.section_action is not None and rule.section_action[0] is not None: label = rule.section_action[0] destination = None if rule.section_action[0] == 'exit': destination = self.exit_vertices[id] elif rule.section_action[0] in ('enter', 'switch'): destination_section = self.rules_file.sections[rule.section_action[1]] destination = self.get_state_machine(destination_section).start_state if destination not in self.state_map: self.allocate_vertex(destination) destination = self.state_map[destination] if destination is not None: self.draw_edge(code, self.state_map[state], destination, label)
class XmlPlugin(PluginTemplate): def __init__(self, rules_file, dependencies, plugin_options): self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=[]) self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids') self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids') self.cache.add_cache('state', lambda state_and_name: state_and_name[1], 'ids') # PluginTemplate interface def process(self, token): if token.token == "ROOT": self.generate_root(token) def get_output_directories(self): return [] def get_files_to_copy(self): return [] def get_files_to_generate(self): return [("LexicalAnalyzer.xml", self.file_name + ".xml")] # Private utility methods def get_form(self): return "NFA"if self.plugin_options.form == self.plugin_options.NFA_IR else "DFA" @staticmethod def get_rules(ids, section): return (rule for rule in section.rules if rule.id in ids) @staticmethod def get_rule_name(rule): return rule.name or 'Anonymous' @staticmethod def get_rule_names(ids, section): return (XmlPlugin.get_rule_name(rule) for rule in XmlPlugin.get_rules(ids, section)) def format_state_id(self, state, section): return self.cache.get_state((state, ''.join(self.get_rule_names(state.ids, section)) or 'State')) def format_rule_id(self, rule, section): return self.cache.get_rule((rule.id, self.get_rule_name(rule) or 'Rule')) # Generation methods def generate_root(self, token): E = self.dependencies["ElementTreeFactory"]._E() root = E.LexicalAnalyzer( E.Form(self.get_form()), E.Sections(*list(self.generate_sections(E))), xmlns="https://github.com/parkertomatoes/poodle-lex") pretty_print(root, token.stream) def generate_sections(self, E): for id, section in self.rules_file.sections.items(): yield self.generate_section(E, id, section) def generate_section(self, E, id, section): def section_rules(): return [E.Rules(*list(self.generate_rules(E, section)))] def section_state_machine(): return self.generate_state_machine(E, section, section.dfa) def section_attributes(): return {'id': self.cache.get_id(id), 'inherits': str(section.inherits).lower(), 'exits': str(section.exits).lower()} return E.Section( *(section_rules() + maybe(hasattr(section, 'dfa'), section_state_machine)), **section_attributes()) def generate_rules(self, E, section): for rule in section.rules: yield self.generate_rule(E, section, rule) def generate_rule(self, E, section, rule): def rule_actions(): return E.Actions(*(E.Action(action) for action in rule.action)) def rule_section_action(): return E.SectionAction(*list( [E.Action(rule.section_action[0])] + maybe(rule.section_action[1] is not None, lambda: E.Section(rule.section_action[1])) )) def rule_nfa(): return self.generate_state_machine(E, section, rule.nfa) def get_rule_id(): return self.format_rule_id(rule, section) def get_rule_name(): return self.get_rule_name(rule) def rule_attributes(): if hasattr(rule, 'line_number'): return {'name': get_rule_name(), 'id': get_rule_id(), 'line_number': str(rule.line_number)} else: return {'name': get_rule_name(), 'id': get_rule_id()} return E.Rule(*( maybe(any(rule.action), rule_actions) + maybe( rule.section_action is not None and rule.section_action[0] is not None, rule_section_action) + maybe(hasattr(rule, 'nfa'), rule_nfa) ), **rule_attributes()) def generate_state_machine(self, E, section, state_machine): def state_machine_start_state(): return self.format_state_id(state_machine.start_state, section) def state_machine_end_state(): return self.format_state_id(state_machine.end_state, section) def state_machine_attributes(): if hasattr(state_machine, 'end_state'): return {'start': state_machine_start_state(), 'end': state_machine_end_state()} else: return {'start': state_machine_start_state()} return E.StateMachine( *list(self.generate_states(E, section, state_machine)), **state_machine_attributes() ) def generate_states(self, E, section, state_machine): for state in state_machine: yield self.generate_state(E, section, state) def generate_state(self, E, section, state): return E.State( E.Ids(*self.generate_state_ids(E, section, state.ids)), E.FinalIds(*self.generate_state_ids(E, section, state.final_ids)), E.Transitions(*list(self.generate_transitions(E, section, state))), id=self.format_state_id(state, section) ) def generate_state_ids(self, E, section, ids): for rule in self.get_rules(ids, section): yield E.Id(self.format_rule_id(rule, section)) def generate_transitions(self, E, section, state): return (list(self.generate_edge_transitions(E, section, state)) + maybe_list(hasattr(state, 'epsilon_edges'), lambda: self.generate_epsilon_transitions(E, section, state))) def generate_edge_transitions(self, E, section, state): for destination, edge in state.edges.items(): yield E.Transition( *list(self.generate_edge(E, section, state, destination, edge)), Destination=self.format_state_id(destination, section) ) def generate_edge(self, E, section, state, destination, edge): for (minv, maxv) in edge: if minv == maxv: yield E.Codepoint(str(minv)) else: yield E.Range(start=str(minv), end=str(maxv)) def generate_epsilon_transitions(self, E, section, state): for destination in state.epsilon_edges: yield E.Transition(Destination=self.format_state_id(destination, section))
class VariableFormatter(object): def __init__(self, plugin_options, reserved_ids): def section_id_formatter(section_id): path = section_id.split('.') if section_id is not None else None path = [i.upper().replace(':', '') for i in path] if path is not None else None return '_'.join(path[1:]) if section_id is not None and len(path) > 0 else '' def token_id_formatter(id): return id.upper() if id is not None else None self.plugin_options = plugin_options self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens') self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens') self.cache.add_token_id(None, 'ANONYMOUS') self.cache.add_token_id('endofstream', 'ENDOFSTREAM') self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER') self.cache.add_section_id(None, '') self.cache.add_section_id('::main::', 'MAIN') self.reserved_ids = reserved_ids for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr)) def get_class_name(self): return self.plugin_options.class_name def get_scoped(self, id, is_relative): if is_relative: return id else: return '{class_name}::{type}'.format(class_name=self.get_class_name(), type=id) def get_state_machine_method_name(self, section, is_relative): section_id = self.cache.get_section_id(section).lower() if section_id != '': section_id = '_' + section_id method_name = 'get_token{id}'.format(id=section_id) return self.get_scoped(method_name, is_relative) def get_type(self, type_name, is_relative): if type_name == 'mode_stack': return "std::stack<{mode_type}>".format(mode_type=self.get_type('mode', is_relative)) elif type_name == 'mode': type_text = 'Mode' elif type_name == 'token': type_text = 'Token' elif type_name == 'state': return 'State' else: raise Exception("unrecognized type '{id}'".format(id=type_name)) return self.get_scoped(type_text, is_relative) def get_state_id_formatter(self, rules): return StateIdFormatter(rules, self.reserved_ids)
class DotPlugin(PluginTemplate): reserved_ids = ["strict", "graph", "digraph", "node", "edge", "subgraph"] def __init__(self, rules_file, dependencies, plugin_options): def formatter(id, prefix=""): if id is not None: path = id.split(".") for i in range(len(path)): path[i] = path[i].lower().replace(":", "") if len(path) > 1: return prefix + "_".join(path[1:]) else: return prefix + "main" return prefix + "anonymous" def section_formatter(id): return formatter(id, "cluster_") self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=self.reserved_ids) self.cache.add_cache("id", formatter, "ids") self.cache.add_cache("section", section_formatter, "ids") self.state_map = {} self.exit_vertices = {} self.entry_vertex = None # PluginTemplate interface def process(self, token): if token.token == "CONTENT": self.generate_content(token) def get_output_directories(self): return [] def get_files_to_copy(self): return [] def get_files_to_generate(self): return [("LexicalAnalyzer.dot", self.file_name + ".dot")] # Private methods def generate_content(self, token): code = CodeEmitter(token.stream) code.indent() # Generate the state machine factory self.get_state_machine = lambda section: section.dfa if self.plugin_options.form == self.plugin_options.NFA_IR: nfas = {} for section in self.rules_file.sections.values(): nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules) self.get_state_machine = lambda section: nfas[section] # Generate vertices for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) with code.block("subgraph {0} {{".format(formatted_id), "}"): code.line('label="{0}"'.format(self.get_section_label(section, id))) self.generate_vertices(code, formatted_id, section) for vertex in self.exit_vertices.values(): self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True) code.line() # Generate edges for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) self.generate_edges(code, formatted_id, section) @staticmethod def get_section_label(section, id): attributes = [] if section.inherits: attributes.append("inherits") elif section.exits: attributes.append("exits") if any(attributes): return "{0} ({1})".format(id, ",".join(attributes)) else: return id @staticmethod def get_rule_ids(rules, ids): for rule in rules: if rule.id in ids: if rule.name is not None: yield rule.name else: yield "Anonymous" @staticmethod def get_matching_rule(rules, ids): return next((rule for rule in rules if rule.id in ids), None) def allocate_vertex(self, state=None): key = state value = self.state_map.get(key) if value is None: value = len(self.state_map) if key == None: key = value self.state_map[key] = value return value @staticmethod def draw_vertex(code, id, label=None, ids=None, final_ids=None, is_final=False, is_shapeless=False): text_label = label if text_label is None: text_label = id if is_shapeless: code.line('{i} [label="{label}", shape=none];'.format(i=id, label=text_label)) elif ids is not None and not is_final: code.line('{i} [label="({ids})"];'.format(i=id, label=text_label, ids=ids)) elif final_ids is None and is_final: code.line('{i} [label="{label}", shape=octagon];'.format(i=id, label=text_label)) elif final_ids is not None and is_final: code.line( '{i} [label="({ids})\\n({final_ids})", shape=octagon];'.format( i=id, label=text_label, ids=ids, final_ids=final_ids ) ) @staticmethod def draw_edge(code, start, end, label=None): if label is None: code.line("{0} -> {1}".format(start, end)) else: code.line('{0} -> {1} [label="{2}"]'.format(start, end, label)) @staticmethod def format_codepoint(codepoint): if codepoint == ord('"'): return "'\\\"'" if codepoint in xrange(32, 127): return "'%s'" % chr(codepoint).replace("\\", "\\\\") else: return "0x%x" % codepoint @staticmethod def format_case(range): if range[0] == range[1]: return "%s" % DotPlugin.format_codepoint(range[0]) else: return "%s-%s" % tuple([DotPlugin.format_codepoint(i) for i in range]) def allocate_vertices(self, section, id, state_machine): for state in state_machine: self.allocate_vertex(state) if id == "cluster_main": self.entry_vertex = self.allocate_vertex() exit_vertex = None if any(rule for rule in section.rules if rule.section_action is not None and rule.section_action[0] == "exit"): exit_vertex = self.allocate_vertex() self.exit_vertices[id] = exit_vertex def generate_vertices(self, code, id, section): state_machine = self.get_state_machine(section) self.allocate_vertices(section, id, state_machine) for state in state_machine: i = self.state_map[state] ids = None if any(state.ids): ids = ", ".join(self.get_rule_ids(section.rules, state.ids)) final_ids = None if any(state.final_ids): final_ids = ", ".join(self.get_rule_ids(section.rules, state.final_ids)) self.draw_vertex(code, id=i, ids=ids, final_ids=final_ids, is_final=any(state.final_ids)) if self.entry_vertex is not None: self.draw_vertex(code, id=self.entry_vertex, label="start", is_shapeless=True) def generate_edges(self, code, id, section): state_machine = self.get_state_machine(section) if self.entry_vertex is not None and id == "cluster_main": self.draw_edge(code, self.entry_vertex, self.state_map[state_machine.start_state]) for state in state_machine: for destination, edge in state.edges.items(): edge_label = ", ".join([self.format_case(i) for i in edge]) edge_label = "\\n".join(textwrap.wrap(edge_label, 256)) self.draw_edge(code, self.state_map[state], self.state_map[destination], edge_label) if hasattr(state, "epsilon_edges"): for destination in state.epsilon_edges: self.draw_edge(code, self.state_map[state], self.state_map[destination], "ε") if any(state.final_ids): rule = self.get_matching_rule(section.rules, state.final_ids) if rule is not None and rule.section_action is not None and rule.section_action[0] is not None: label = rule.section_action[0] destination = None if rule.section_action[0] == "exit": destination = self.exit_vertices[id] elif rule.section_action[0] in ("enter", "switch"): destination_section = self.rules_file.sections[rule.section_action[1]] destination = self.get_state_machine(destination_section).start_state if destination not in self.state_map: self.allocate_vertex(destination) destination = self.state_map[destination] if destination is not None: self.draw_edge(code, self.state_map[state], destination, label)
class XmlPlugin(PluginTemplate): def __init__(self, rules_file, dependencies, plugin_options): self.rules_file = rules_file self.dependencies = dependencies self.plugin_options = plugin_options self.file_name = self.plugin_options.file_name if self.file_name is None: self.file_name = "LexicalAnalyzer" self.cache = CachedFormatter(limit=64, reserved=[]) self.cache.add_cache('id', lambda id: id or 'Anonymous', 'ids') self.cache.add_cache('rule', lambda id_and_rule: id_and_rule[1], 'ids') self.cache.add_cache('state', lambda state_and_name: state_and_name[1], 'ids') # PluginTemplate interface def process(self, token): if token.token == "ROOT": self.generate_root(token) def get_output_directories(self): return [] def get_files_to_copy(self): return [] def get_files_to_generate(self): return [("LexicalAnalyzer.xml", self.file_name + ".xml")] # Private utility methods def get_form(self): return "NFA" if self.plugin_options.form == self.plugin_options.NFA_IR else "DFA" @staticmethod def get_rules(ids, section): return (rule for rule in section.rules if rule.id in ids) @staticmethod def get_rule_name(rule): return rule.name or 'Anonymous' @staticmethod def get_rule_names(ids, section): return (XmlPlugin.get_rule_name(rule) for rule in XmlPlugin.get_rules(ids, section)) def format_state_id(self, state, section): return self.cache.get_state( (state, ''.join(self.get_rule_names(state.ids, section)) or 'State')) def format_rule_id(self, rule, section): return self.cache.get_rule((rule.id, self.get_rule_name(rule) or 'Rule')) # Generation methods def generate_root(self, token): E = self.dependencies["ElementTreeFactory"]._E() root = E.LexicalAnalyzer( E.Form(self.get_form()), E.Sections(*list(self.generate_sections(E))), xmlns="https://github.com/parkertomatoes/poodle-lex") pretty_print(root, token.stream) def generate_sections(self, E): for id, section in self.rules_file.sections.items(): yield self.generate_section(E, id, section) def generate_section(self, E, id, section): def section_rules(): return [E.Rules(*list(self.generate_rules(E, section)))] def section_state_machine(): return self.generate_state_machine(E, section, section.dfa) def section_attributes(): return { 'id': self.cache.get_id(id), 'inherits': str(section.inherits).lower(), 'exits': str(section.exits).lower() } return E.Section( *(section_rules() + maybe(hasattr(section, 'dfa'), section_state_machine)), **section_attributes()) def generate_rules(self, E, section): for rule in section.rules: yield self.generate_rule(E, section, rule) def generate_rule(self, E, section, rule): def rule_actions(): return E.Actions(*(E.Action(action) for action in rule.action)) def rule_section_action(): return E.SectionAction( *list([E.Action(rule.section_action[0])] + maybe(rule.section_action[1] is not None, lambda: E.Section(rule.section_action[1])))) def rule_nfa(): return self.generate_state_machine(E, section, rule.nfa) def get_rule_id(): return self.format_rule_id(rule, section) def get_rule_name(): return self.get_rule_name(rule) def rule_attributes(): if hasattr(rule, 'line_number'): return { 'name': get_rule_name(), 'id': get_rule_id(), 'line_number': str(rule.line_number) } else: return {'name': get_rule_name(), 'id': get_rule_id()} return E.Rule( *(maybe(any(rule.action), rule_actions) + maybe( rule.section_action is not None and rule.section_action[0] is not None, rule_section_action) + maybe(hasattr(rule, 'nfa'), rule_nfa)), **rule_attributes()) def generate_state_machine(self, E, section, state_machine): def state_machine_start_state(): return self.format_state_id(state_machine.start_state, section) def state_machine_end_state(): return self.format_state_id(state_machine.end_state, section) def state_machine_attributes(): if hasattr(state_machine, 'end_state'): return { 'start': state_machine_start_state(), 'end': state_machine_end_state() } else: return {'start': state_machine_start_state()} return E.StateMachine( *list(self.generate_states(E, section, state_machine)), **state_machine_attributes()) def generate_states(self, E, section, state_machine): for state in state_machine: yield self.generate_state(E, section, state) def generate_state(self, E, section, state): return E.State( E.Ids(*self.generate_state_ids(E, section, state.ids)), E.FinalIds(*self.generate_state_ids(E, section, state.final_ids)), E.Transitions(*list(self.generate_transitions(E, section, state))), id=self.format_state_id(state, section)) def generate_state_ids(self, E, section, ids): for rule in self.get_rules(ids, section): yield E.Id(self.format_rule_id(rule, section)) def generate_transitions(self, E, section, state): return ( list(self.generate_edge_transitions(E, section, state)) + maybe_list( hasattr(state, 'epsilon_edges'), lambda: self.generate_epsilon_transitions(E, section, state))) def generate_edge_transitions(self, E, section, state): for destination, edge in state.edges.items(): yield E.Transition( *list(self.generate_edge(E, section, state, destination, edge)), Destination=self.format_state_id(destination, section)) def generate_edge(self, E, section, state, destination, edge): for (minv, maxv) in edge: if minv == maxv: yield E.Codepoint(str(minv)) else: yield E.Range(start=str(minv), end=str(maxv)) def generate_epsilon_transitions(self, E, section, state): for destination in state.epsilon_edges: yield E.Transition( Destination=self.format_state_id(destination, section))
class VariableFormatter(object): def __init__(self, plugin_options, reserved_ids): def section_id_formatter(section_id): path = section_id.split('.') if section_id is not None else None path = [i.upper().replace(':', '') for i in path] if path is not None else None return '_'.join( path[1:]) if section_id is not None and len(path) > 0 else '' def token_id_formatter(id): return id.upper() if id is not None else None self.plugin_options = plugin_options self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens') self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens') self.cache.add_token_id(None, 'ANONYMOUS') self.cache.add_token_id('endofstream', 'ENDOFSTREAM') self.cache.add_token_id('invalidcharacter', 'INVALIDCHARACTER') self.cache.add_section_id(None, '') self.cache.add_section_id('::main::', 'MAIN') self.reserved_ids = reserved_ids for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr)) def get_class_name(self): return self.plugin_options.class_name def get_scoped(self, id, is_relative): if is_relative: return id else: return '{class_name}::{type}'.format( class_name=self.get_class_name(), type=id) def get_state_machine_method_name(self, section, is_relative): section_id = self.cache.get_section_id(section).lower() if section_id != '': section_id = '_' + section_id method_name = 'get_token{id}'.format(id=section_id) return self.get_scoped(method_name, is_relative) def get_type(self, type_name, is_relative): if type_name == 'mode_stack': return "std::stack<{mode_type}>".format( mode_type=self.get_type('mode', is_relative)) elif type_name == 'mode': type_text = 'Mode' elif type_name == 'token': type_text = 'Token' elif type_name == 'state': return 'State' else: raise Exception("unrecognized type '{id}'".format(id=type_name)) return self.get_scoped(type_text, is_relative) def get_state_id_formatter(self, rules): return StateIdFormatter(rules, self.reserved_ids)
class VariableFormatter(object): def __init__(self, plugin_options, reserved_ids, poodle_namespace): def section_id_formatter(section_id): path = section_id.split('.') if section_id is not None else None path = [i.replace(':', '') for i in path] if path is not None else None return ''.join(path[1:]) if section_id is not None and len(path) > 0 else '' def token_id_formatter(id): return id self.poodle_namespace = poodle_namespace self.plugin_options = plugin_options self.reserved_ids = reserved_ids self.cache = CachedFormatter(limit=64, reserved=reserved_ids) self.cache.add_cache('section_id', section_id_formatter, cache_name='section_and_tokens') self.cache.add_cache('token_id', token_id_formatter, cache_name='section_and_tokens') self.cache.add_section_id(None, '') self.cache.add_section_id('::main::', 'Main') for attr in dir(self.cache): if any(attr.startswith(i) for i in ('get_', 'add_', 'clear_')): setattr(self, attr, getattr(self.cache, attr)) def get_class_name(self): return self.plugin_options.class_name def get_default_encoding(self): return self.get_scoped('Unicode.DefaultStringEncoding', is_relative=True, is_custom_namespace=False) def get_mode_stack_class_name(self): return self.get_class_name() + "Mode" def get_namespace(self): return self.plugin_options.namespace def get_scoped(self, id, is_relative, is_custom_namespace): if is_relative: if not is_custom_namespace and self.plugin_options.namespace != self.poodle_namespace: return '{poodle}.{type}'.format(poodle=self.poodle_namespace, type=type_text) else: return id else: return '{namespace}.{type}'.format(namespace=self.plugin_options.namespace, type=id) def get_state_machine_method_name(self, section, is_relative): method_name = 'GetToken{id}'.format(id=self.cache.get_section_id(section)) if not is_relative: method_name = '{class_name}.{method_name}'.format( class_name=self.get_class_name(), method_name=method_name) return self.get_scoped(method_name, is_relative, is_custom_namespace=True) def get_type(self, type_name, is_relative): if type_name == 'mode': is_custom_namespace = True type_text = "{class_name}.ModeId".format(class_name=self.get_mode_stack_class_name()) elif type_name == 'token': is_custom_namespace = True type_text = "{class_name}Token".format(class_name=self.plugin_options.class_name) elif type_name == 'text': is_custom_namespace = False type_text = 'Unicode.Text' elif type_name == 'encoding': is_custom_namespace = False type_text = 'Unicode.StringEncoding' elif type_name == 'character': is_custom_namespace = False type_text = 'Unicode.Codepoint' elif type_name == 'stream': is_custom_namespace = False type_text = 'CharacterStream' else: raise Exception("unrecognized type '{id}'".format(id=type_name)) return self.get_scoped(type_text, is_relative, is_custom_namespace) def get_state_id_formatter(self, rules): return StateIdFormatter(rules, self.reserved_ids) def get_unicode_char_name(self, codepoint): try: unicode_name = unicodedata.name(unichr(codepoint)).replace(' ', '_').replace('-', '_') return "{namespace}_UCS_{name}".format( namespace = self.get_namespace().upper(), name = unicode_name.upper())[:64] except ValueError: return "&h%02x" % codepoint