def generate_content(self, token): code = CodeEmitter(token.stream) code.indent() # Generate the state machine factory self.get_state_machine = lambda section: section.dfa if self.plugin_options.form == self.plugin_options.NFA_IR: nfas = {} for section in self.rules_file.sections.values(): nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules) self.get_state_machine = lambda section: nfas[section] # Generate vertices for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) with code.block("subgraph {0} {{".format(formatted_id), "}"): code.line('label="{0}"'.format(self.get_section_label(section, id))) self.generate_vertices(code, formatted_id, section) for vertex in self.exit_vertices.values(): self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True) code.line() #Generate edges for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) self.generate_edges(code, formatted_id, section)
def generate_content(self, token): code = CodeEmitter(token.stream) code.indent() # Generate the state machine factory self.get_state_machine = lambda section: section.dfa if self.plugin_options.form == self.plugin_options.NFA_IR: nfas = {} for section in self.rules_file.sections.values(): nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules) self.get_state_machine = lambda section: nfas[section] # Generate vertices for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) with code.block("subgraph {0} {{".format(formatted_id), "}"): code.line('label="{0}"'.format(self.get_section_label(section, id))) self.generate_vertices(code, formatted_id, section) for vertex in self.exit_vertices.values(): self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True) code.line() # Generate edges for id, section in self.rules_file.sections.items(): formatted_id = self.cache.get_section(id) self.generate_edges(code, formatted_id, section)
def process(self, token): if token.token == "BASE_CLASS": if len(self.lexical_analyzer.sections) > 1: token.stream.write(self.formatter.get_mode_stack_class_name()) else: token.stream.write("Object") elif token.token == "BASE_FILE_NAME": token.stream.write(self.plugin_options.file_name) elif token.token == "CLASS_NAME": token.stream.write(self.formatter.get_class_name()) elif token.token == "DEFAULT_ENCODING": token.stream.write(self.formatter.get_default_encoding()) elif token.token == "ENUM_MODE_IDS": code = CodeEmitter(token.stream, token.indent) for section in self.lexical_analyzer.sections: x = self.formatter.get_section_id(section) code.line(x) elif token.token == "ENUM_TOKEN_IDS": code = CodeEmitter(token.stream, token.indent) rules = [id for id in self.lexical_analyzer.rule_ids.values() if id is not None] rules.append("SkippedToken") for rule in sorted(rules): code.line(self.formatter.get_token_id(rule)) elif token.token == "HEADER_GUARD_NAME": token.stream.write( "{namespace}_{class_name}_BI".format( namespace=self.plugin_options.namespace.upper(), class_name=self.plugin_options.class_name.upper() ) ) elif token.token == "HEADER_GUARD_MODE_NAME": token.stream.write( "{namespace}_{class_name}_BI".format( namespace=self.plugin_options.namespace.upper(), class_name=self.formatter.get_mode_stack_class_name().upper(), ) ) elif token.token == "INITIAL_MODE_ID": token.stream.write(self.formatter.get_section_id("::main::")) elif token.token == "MODE_INCLUDE": if len(self.lexical_analyzer.sections) > 1: code = CodeEmitter(token.stream, token.indent) code.line( '#include "{include_file}"'.format(include_file=self.plugin_options.file_name + "ModeStack.bi") ) elif token.token == "MODE_SOURCE": if len(self.lexical_analyzer.sections) > 1: token.stream.write( "../{base_file_name}ModeStack.bas".format(base_file_name=self.plugin_options.file_name) ) elif token.token == "MODE_STACK_CLASS_NAME": token.stream.write(self.formatter.get_mode_stack_class_name()) elif token.token == "NAMESPACE": token.stream.write(self.formatter.get_namespace()) elif token.token == "STACK_DEPTH_ID": token.stream.write( "{namespace}_{class_name}_STACK_DEPTH".format( namespace=self.plugin_options.namespace.upper(), class_name=self.formatter.get_mode_stack_class_name().upper(), ) ) elif token.token == "STACK_DEPTH": token.stream.write("32") elif token.token == "STATE_MACHINE_DECLARATIONS": if len(self.lexical_analyzer.sections) > 1: code = CodeEmitter(token.stream, token.indent) for section in self.lexical_analyzer.sections: code.line( "Declare Function {method_name}() As {token_type}".format( method_name=self.formatter.get_state_machine_method_name(section, True), token_type=self.formatter.get_type("token", True), ) ) elif token.token == "STATE_MACHINES": code = CodeEmitter(token.stream, token.indent) if len(self.lexical_analyzer.sections) > 1: self.StateMachineEmitter.generate_state_machine_switch(code, self.formatter, self.lexical_analyzer) for i, section in enumerate(self.lexical_analyzer.sections): code.line() emitter = self.StateMachineEmitter( code=code, formatter=self.formatter, dfa_ir=self.lexical_analyzer, section_id=section ) emitter.generate_state_machine() elif token.token == "TOKEN_IDNAMES": code = CodeEmitter(token.stream, token.indent) filtered_ids = [rule_id for rule_id in self.lexical_analyzer.rule_ids.values() if rule_id is not None] filtered_ids.append("SkippedToken") for i, rule in enumerate(sorted(filtered_ids)): template = '@"{name}"'.format(name=rule) template += ", _" if i < len(filtered_ids) - 1 else " _" code.line(template.format(name=rule)) elif token.token == "TOKEN_IDNAMES_LIMIT": token.stream.write(str(len(self.lexical_analyzer.rule_ids) + 2)) elif token.token.startswith("TYPE_REL_"): type_name = token.token[len("TYPE_REL_") :].lower() token.stream.write(self.formatter.get_type(type_name, True)) elif token.token.startswith("TYPE_"): type_name = token.token[len("TYPE_") :].lower() token.stream.write(self.formatter.get_type(type_name, False)) elif token.token == "UNICODE_CHAR_DEFINES": namespace = self.plugin_options.namespace.upper() all_range_edges = set() for section in self.lexical_analyzer.sections.values(): for state in section.dfa: for edge in state.edges.values(): for minv, maxv in edge: all_range_edges.update((minv, maxv)) for codepoint in all_range_edges: name = self.formatter.get_unicode_char_name(codepoint) if name[0] != "&": token.stream.write("#define {name} {codepoint}\n".format(name=name, codepoint="&h%02x" % codepoint)) elif token.token == "UNICODE_HEADER_GUARD_NAME": namespace = self.plugin_options.namespace.upper() class_name = self.plugin_options.class_name.upper() token.stream.write("_".join((namespace, class_name, "UCS_DEFINES"))) else: raise Exception("Token '{id}' not recognized".format(id=token.token))
def process(self, token): if token.token == 'BASE_FILE_NAME': token.stream.write(self.base_file_name) elif token.token == 'CLASS_NAME': token.stream.write(self.class_name) elif token.token == 'ENUM_TOKEN_IDS': ids = sorted(rule for rule in self.dfa_ir.rule_ids.values() if rule is not None) ids.insert(0, 'invalidcharacter') ids.insert(0, 'endofstream') ids.insert(0, 'skippedtoken') for i, id in enumerate(ids): id_key = self.formatter.get_token_id(id) if i != len(ids) - 1: token.stream.write('{indent}{id},\n'.format(indent=' ' * token.indent, id=id_key)) else: token.stream.write('{indent}{id}\n'.format(indent=' ' * token.indent, id=id_key)) elif token.token == 'ENUM_SECTION_IDS': code = CodeEmitter(token.stream, token.indent) code.line() if len(self.dfa_ir.sections) > 1: code.line('enum Mode') with code.block('{', '};'): ids = sorted(self.dfa_ir.sections) for i, id in enumerate(ids): formatted = self.formatter.get_section_id(id) if i == len(ids) - 1: code.line(formatted) else: code.line('{id},'.format(id=formatted)) code.line() elif token.token == 'HEADER_GUARD': token.stream.write("{namespace}_{basefile}_H".format( namespace=self.namespace.replace(':', ''), basefile=self.base_file_name.upper())) elif token.token == 'INCLUDES': token.stream.write("{indent}#include \"{file_name}.h\"".format( indent=' ' * token.indent, file_name=self.base_file_name)) elif token.token == 'MODE_STACK_DECLARATION': token.stream.write('{indent}std::istream* stream;\n'.format( indent=' ' * token.indent)) if len(self.dfa_ir.sections) > 1: token.stream.write('{indent}{stack_type} mode;\n'.format( indent=' ' * token.indent, stack_type=self.formatter.get_type('mode_stack', is_relative=True))) elif token.token == 'MODE_STACK_INCLUDE': if len(self.dfa_ir.sections) > 1: token.stream.write('#include <stack>\n\n') elif token.token == 'NAMESPACE': token.stream.write(self.namespace) elif token.token == 'PUSH_INITIAL_MODE': if len(self.dfa_ir.sections) > 1: token.stream.write( '{indent}this->mode.push({initial_mode});\n'.format( indent=' ' * token.indent, initial_mode=self.formatter.get_section_id( '::main::'))) elif token.token == 'SELECT_ID_STRING': ids = sorted(rule for rule in self.dfa_ir.rule_ids.values() if rule is not None) code = CodeEmitter(token.stream, token.indent) for id in ids: with code.block('case {class_name}::Token::{token_id}:'.format( class_name=self.class_name, token_id=self.formatter.get_token_id(id))): code.line('id_string = "{id}";'.format(id=id)) code.line('break;') elif token.token == 'STATE_MACHINE_METHOD_DECLARATIONS': token.stream.write("{indent}{token_type} get_token();\n".format( indent=' ' * token.indent, token_type=self.formatter.get_type('token', is_relative=True))) if len(self.dfa_ir.sections) > 1: for section_id in self.dfa_ir.sections: token.stream.write( "{indent}{token_type} {method_name}();\n".format( indent=' ' * token.indent, token_type=self.formatter.get_type( 'token', is_relative=True), method_name=self.formatter. get_state_machine_method_name(section_id, is_relative=True))) elif token.token == 'STATE_MACHINES': code = CodeEmitter(token.stream, token.indent) if len(self.dfa_ir.sections) > 1: self.emit_state_machine_switch(code) for section in self.dfa_ir.sections: state_machine_emitter = self.StateMachineEmitter( self.dfa_ir, section, self.formatter, code) state_machine_emitter.emit_state_machine() else: raise Exception("Unrecognized token: {0}".format(token.token))
def process(self, token): if token.token == 'BASE_FILE_NAME': token.stream.write(self.base_file_name) elif token.token == 'CLASS_NAME': token.stream.write(self.class_name) elif token.token == 'ENUM_TOKEN_IDS': ids = sorted(rule for rule in self.dfa_ir.rule_ids.values() if rule is not None) ids.insert(0, 'invalidcharacter') ids.insert(0, 'endofstream') ids.insert(0, 'skippedtoken') for i, id in enumerate(ids): id_key = self.formatter.get_token_id(id) if i != len(ids)-1: token.stream.write('{indent}{id},\n'.format(indent=' '*token.indent, id=id_key)) else: token.stream.write('{indent}{id}\n'.format(indent=' '*token.indent, id=id_key)) elif token.token == 'ENUM_SECTION_IDS': code = CodeEmitter(token.stream, token.indent) code.line() if len(self.dfa_ir.sections) > 1: code.line('enum Mode'); with code.block('{', '};'): ids = sorted(self.dfa_ir.sections) for i, id in enumerate(ids): formatted = self.formatter.get_section_id(id) if i == len(ids)-1: code.line(formatted) else: code.line('{id},'.format(id=formatted)) code.line() elif token.token == 'HEADER_GUARD': token.stream.write("{namespace}_{basefile}_H".format( namespace=self.namespace.replace(':', ''), basefile=self.base_file_name.upper())) elif token.token == 'INCLUDES': token.stream.write("{indent}#include \"{file_name}.h\"".format( indent=' '*token.indent, file_name=self.base_file_name)) elif token.token == 'MODE_STACK_DECLARATION': token.stream.write('{indent}std::istream* stream;\n'.format(indent=' '*token.indent)) if len(self.dfa_ir.sections) > 1: token.stream.write('{indent}{stack_type} mode;\n'.format( indent=' '*token.indent, stack_type=self.formatter.get_type('mode_stack', is_relative=True))) elif token.token == 'MODE_STACK_INCLUDE': if len(self.dfa_ir.sections) > 1: token.stream.write('#include <stack>\n\n') elif token.token == 'NAMESPACE': token.stream.write(self.namespace) elif token.token == 'PUSH_INITIAL_MODE': if len(self.dfa_ir.sections) > 1: token.stream.write('{indent}this->mode.push({initial_mode});\n'.format( indent = ' '*token.indent, initial_mode = self.formatter.get_section_id('::main::'))) elif token.token == 'SELECT_ID_STRING': ids = sorted(rule for rule in self.dfa_ir.rule_ids.values() if rule is not None) code = CodeEmitter(token.stream, token.indent) for id in ids: with code.block('case {class_name}::Token::{token_id}:'.format( class_name=self.class_name, token_id=self.formatter.get_token_id(id))): code.line('id_string = "{id}";'.format(id=id)) code.line('break;') elif token.token == 'STATE_MACHINE_METHOD_DECLARATIONS': token.stream.write("{indent}{token_type} get_token();\n".format( indent = ' '*token.indent, token_type = self.formatter.get_type('token', is_relative=True))) if len(self.dfa_ir.sections) > 1: for section_id in self.dfa_ir.sections: token.stream.write("{indent}{token_type} {method_name}();\n".format( indent = ' '*token.indent, token_type = self.formatter.get_type('token', is_relative=True), method_name = self.formatter.get_state_machine_method_name(section_id, is_relative=True))) elif token.token == 'STATE_MACHINES': code = CodeEmitter(token.stream, token.indent) if len(self.dfa_ir.sections) > 1: self.emit_state_machine_switch(code) for section in self.dfa_ir.sections: state_machine_emitter = self.StateMachineEmitter(self.dfa_ir, section, self.formatter, code) state_machine_emitter.emit_state_machine() else: raise Exception("Unrecognized token: {0}".format(token.token))
def process(self, token): if token.token == 'BASE_CLASS': if len(self.lexical_analyzer.sections) > 1: token.stream.write(self.formatter.get_mode_stack_class_name()) else: token.stream.write('Object') elif token.token == 'BASE_FILE_NAME': token.stream.write(self.plugin_options.file_name) elif token.token == 'CLASS_NAME': token.stream.write(self.formatter.get_class_name()) elif token.token == 'DEFAULT_ENCODING': token.stream.write(self.formatter.get_default_encoding()) elif token.token == 'ENUM_MODE_IDS': code = CodeEmitter(token.stream, token.indent) for section in self.lexical_analyzer.sections: x = self.formatter.get_section_id(section) code.line(x) elif token.token == 'ENUM_TOKEN_IDS': code = CodeEmitter(token.stream, token.indent) rules = [ id for id in self.lexical_analyzer.rule_ids.values() if id is not None ] rules.append('SkippedToken') for rule in sorted(rules): code.line(self.formatter.get_token_id(rule)) elif token.token == 'HEADER_GUARD_NAME': token.stream.write('{namespace}_{class_name}_BI'.format( namespace=self.plugin_options.namespace.upper(), class_name=self.plugin_options.class_name.upper())) elif token.token == 'HEADER_GUARD_MODE_NAME': token.stream.write('{namespace}_{class_name}_BI'.format( namespace=self.plugin_options.namespace.upper(), class_name=self.formatter.get_mode_stack_class_name().upper())) elif token.token == 'INITIAL_MODE_ID': token.stream.write(self.formatter.get_section_id('::main::')) elif token.token == 'MODE_INCLUDE': if len(self.lexical_analyzer.sections) > 1: code = CodeEmitter(token.stream, token.indent) code.line('#include "{include_file}"'.format( include_file=self.plugin_options.file_name + "ModeStack.bi")) elif token.token == 'MODE_SOURCE': if len(self.lexical_analyzer.sections) > 1: token.stream.write("../{base_file_name}ModeStack.bas".format( base_file_name=self.plugin_options.file_name)) elif token.token == 'MODE_STACK_CLASS_NAME': token.stream.write(self.formatter.get_mode_stack_class_name()) elif token.token == 'NAMESPACE': token.stream.write(self.formatter.get_namespace()) elif token.token == 'STACK_DEPTH_ID': token.stream.write('{namespace}_{class_name}_STACK_DEPTH'.format( namespace=self.plugin_options.namespace.upper(), class_name=self.formatter.get_mode_stack_class_name().upper())) elif token.token == 'STACK_DEPTH': token.stream.write('32') elif token.token == 'STATE_MACHINE_DECLARATIONS': if len(self.lexical_analyzer.sections) > 1: code = CodeEmitter(token.stream, token.indent) for section in self.lexical_analyzer.sections: code.line( "Declare Function {method_name}() As {token_type}". format(method_name=self.formatter. get_state_machine_method_name(section, True), token_type=self.formatter.get_type( 'token', True))) elif token.token == 'STATE_MACHINES': code = CodeEmitter(token.stream, token.indent) if len(self.lexical_analyzer.sections) > 1: self.StateMachineEmitter.generate_state_machine_switch( code, self.formatter, self.lexical_analyzer) for i, section in enumerate(self.lexical_analyzer.sections): code.line() emitter = self.StateMachineEmitter( code=code, formatter=self.formatter, dfa_ir=self.lexical_analyzer, section_id=section) emitter.generate_state_machine() elif token.token == 'TOKEN_IDNAMES': code = CodeEmitter(token.stream, token.indent) filtered_ids = [ rule_id for rule_id in self.lexical_analyzer.rule_ids.values() if rule_id is not None ] filtered_ids.append('SkippedToken') for i, rule in enumerate(sorted(filtered_ids)): template = '@"{name}"'.format(name=rule) template += ", _" if i < len(filtered_ids) - 1 else " _" code.line(template.format(name=rule)) elif token.token == 'TOKEN_IDNAMES_LIMIT': token.stream.write(str(len(self.lexical_analyzer.rule_ids) + 2)) elif token.token.startswith('TYPE_REL_'): type_name = token.token[len('TYPE_REL_'):].lower() token.stream.write(self.formatter.get_type(type_name, True)) elif token.token.startswith('TYPE_'): type_name = token.token[len('TYPE_'):].lower() token.stream.write(self.formatter.get_type(type_name, False)) elif token.token == 'UNICODE_CHAR_DEFINES': namespace = self.plugin_options.namespace.upper() all_range_edges = set() for section in self.lexical_analyzer.sections.values(): for state in section.dfa: for edge in state.edges.values(): for minv, maxv in edge: all_range_edges.update((minv, maxv)) for codepoint in all_range_edges: name = self.formatter.get_unicode_char_name(codepoint) if name[0] != '&': token.stream.write("#define {name} {codepoint}\n".format( name=name, codepoint="&h%02x" % codepoint)) elif token.token == 'UNICODE_HEADER_GUARD_NAME': namespace = self.plugin_options.namespace.upper() class_name = self.plugin_options.class_name.upper() token.stream.write('_'.join( (namespace, class_name, 'UCS_DEFINES'))) else: raise Exception( "Token '{id}' not recognized".format(id=token.token))